diff --git a/README.md b/README.md index 3b0f0e952850a23210ed3e451c43bf1fff2a0a7a..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,218 @@ --- -license: agpl-3.0 +library_name: peft +base_model: mistralai/Mixtral-8x7B-v0.1 --- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.7.0 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "w1", + "gate", + "w2", + "q_proj", + "w3", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39e3ef00a3d1f840c0bad9ae799106645d481de6 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a8dc900c45c16fe4060c56a74bcce93fd948f888817926ff3324f571a9d29c +size 3875879784 diff --git a/checkpoint-2499/README.md b/checkpoint-2499/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda --- /dev/null +++ b/checkpoint-2499/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: mistralai/Mixtral-8x7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.7.0 \ No newline at end of file diff --git a/checkpoint-2499/README.md:com.dropbox.attrs b/checkpoint-2499/README.md:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..1808e0b593d04494a557f6700a0a684b4dea91a4 Binary files /dev/null and b/checkpoint-2499/README.md:com.dropbox.attrs differ diff --git a/checkpoint-2499/adapter_config.json b/checkpoint-2499/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a --- /dev/null +++ b/checkpoint-2499/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "w1", + "gate", + "w2", + "q_proj", + "w3", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2499/adapter_config.json:com.dropbox.attrs b/checkpoint-2499/adapter_config.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..00aecb26753e4aea65baffa2d0881f0c47b1bd2c Binary files /dev/null and b/checkpoint-2499/adapter_config.json:com.dropbox.attrs differ diff --git a/checkpoint-2499/adapter_model.safetensors b/checkpoint-2499/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9223c1ecabcbcb75ec367af6667223c798711e15 --- /dev/null +++ b/checkpoint-2499/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d145f0793c75b00a347852ac3d1ff74246478adc3d2fbd80da5134d7c45216b3 +size 3875879784 diff --git a/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..f897259a0e557e63001059025c0d0379605e90fb Binary files /dev/null and b/checkpoint-2499/adapter_model.safetensors:com.dropbox.attrs differ diff --git a/checkpoint-2499/optimizer.pt b/checkpoint-2499/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d32e15c7914c85a2a8be4364ccb3a2c4c4a4a871 --- /dev/null +++ b/checkpoint-2499/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b19ff3b3f8e900f1a64f6fca67dbbce1331401ef929eae3800b067cfc39c4f7 +size 1943844127 diff --git a/checkpoint-2499/optimizer.pt:com.dropbox.attrs b/checkpoint-2499/optimizer.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..f383fb65bb72bdde60e257709707d96722ad58dc Binary files /dev/null and b/checkpoint-2499/optimizer.pt:com.dropbox.attrs differ diff --git a/checkpoint-2499/rng_state.pth b/checkpoint-2499/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebb27732da7a38b431129ebf31376f218c83f61e --- /dev/null +++ b/checkpoint-2499/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3674ba216d6f7f8897c3829f726223c3519635c58acbcadefc26dde709002fdd +size 14575 diff --git a/checkpoint-2499/rng_state.pth:com.dropbox.attrs b/checkpoint-2499/rng_state.pth:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..1cc85d53bf48bf264e31895a66cbd48007d774c4 Binary files /dev/null and b/checkpoint-2499/rng_state.pth:com.dropbox.attrs differ diff --git a/checkpoint-2499/scheduler.pt b/checkpoint-2499/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f1a3892702919ebf655e78e56235f9a67f56091 --- /dev/null +++ b/checkpoint-2499/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf09374ab62b3b49f9a578d2d4aee7daeb894d5ef4fb18b178a6d45b26190dc1 +size 627 diff --git a/checkpoint-2499/scheduler.pt:com.dropbox.attrs b/checkpoint-2499/scheduler.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..ae9c8204d36673eb197b758ca794f99bc8c4e03f Binary files /dev/null and b/checkpoint-2499/scheduler.pt:com.dropbox.attrs differ diff --git a/checkpoint-2499/trainer_state.json b/checkpoint-2499/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..decb845f8699fad9a5dc10c0342ab19c70c2d2a6 --- /dev/null +++ b/checkpoint-2499/trainer_state.json @@ -0,0 +1,15111 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.972388955582233, + "eval_steps": 209, + "global_step": 2499, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.1426, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 2.071432113647461, + "eval_runtime": 279.6718, + "eval_samples_per_second": 0.737, + "eval_steps_per_second": 0.737, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.4033, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 2.1893, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 2.3226, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 2.2485, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.9704, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014, + "loss": 1.6929, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 2.2957, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018, + "loss": 1.9907, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 2.1295, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999287109068, + "loss": 2.2249, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999997148436365, + "loss": 2.1733, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199999935839822, + "loss": 2.1404, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999988593747084, + "loss": 2.0236, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999982177731722, + "loss": 1.9639, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999974335937034, + "loss": 1.692, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999965068364137, + "loss": 2.3609, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999954375014348, + "loss": 2.3553, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999942255889198, + "loss": 1.5733, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999928710990412, + "loss": 1.7505, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999913740319922, + "loss": 2.3068, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999897343879862, + "loss": 1.8371, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999987952167257, + "loss": 1.9852, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999860273700585, + "loss": 1.9625, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999839599966655, + "loss": 2.1089, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999817500473724, + "loss": 2.1086, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999793975224945, + "loss": 2.0284, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999769024223673, + "loss": 2.3641, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999742647473464, + "loss": 1.963, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999714844978078, + "loss": 2.0635, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999968561674148, + "loss": 1.9304, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999654962767839, + "loss": 1.4124, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999622883061518, + "loss": 2.1444, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999589377627102, + "loss": 1.6477, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955444646936, + "loss": 2.2601, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999518089593282, + "loss": 1.6256, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999948030700404, + "loss": 1.9155, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999441098707025, + "loss": 2.1408, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999400464707832, + "loss": 2.104, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999935840501225, + "loss": 1.9841, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999314919626272, + "loss": 1.5924, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999270008556108, + "loss": 1.9956, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999223671808154, + "loss": 1.4673, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999175909389018, + "loss": 2.1595, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999126721305513, + "loss": 1.8439, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999076107564648, + "loss": 1.9961, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999024068173638, + "loss": 2.1504, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998970603139912, + "loss": 2.2907, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999891571247108, + "loss": 1.5709, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999885939617498, + "loss": 2.4504, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998801654259632, + "loss": 2.3787, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999874248673328, + "loss": 2.0434, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998681893604347, + "loss": 2.1671, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999861987488148, + "loss": 1.7432, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998556430573521, + "loss": 1.7737, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998491560689513, + "loss": 2.0122, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999842526523871, + "loss": 1.7545, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998357544230558, + "loss": 2.201, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998288397674716, + "loss": 2.0396, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999821782558104, + "loss": 1.9275, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998145827959598, + "loss": 1.7797, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999807240482065, + "loss": 2.1463, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997997556174665, + "loss": 1.935, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999792128203232, + "loss": 2.1182, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999784358240448, + "loss": 2.2297, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997764457302234, + "loss": 2.1052, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999768390673686, + "loss": 2.0777, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997601930719835, + "loss": 2.1419, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999751852926286, + "loss": 2.2586, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997433702377817, + "loss": 1.9089, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997347450076801, + "loss": 2.0587, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997259772372116, + "loss": 2.4143, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997170669276256, + "loss": 1.947, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997080140801932, + "loss": 2.008, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996988186962041, + "loss": 2.4912, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996894807769707, + "loss": 2.0279, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996800003238232, + "loss": 1.9914, + "step": 77 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001999670377338114, + "loss": 1.9091, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996606118212148, + "loss": 1.8038, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996507037745183, + "loss": 2.3573, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996406531994364, + "loss": 2.3204, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999630460097403, + "loss": 2.1619, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999620124469871, + "loss": 1.9977, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996096463183142, + "loss": 2.195, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995990256442263, + "loss": 1.9909, + "step": 85 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995882624491217, + "loss": 2.2001, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995773567345354, + "loss": 1.5795, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995663085020212, + "loss": 2.174, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995551177531557, + "loss": 1.9605, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995437844895334, + "loss": 2.1768, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999532308712771, + "loss": 1.6906, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995206904245037, + "loss": 2.1029, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995089296263893, + "loss": 2.0652, + "step": 93 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994970263201035, + "loss": 2.1733, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999484980507344, + "loss": 1.9413, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999472792189828, + "loss": 1.9538, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994604613692935, + "loss": 2.4158, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994479880474988, + "loss": 1.8964, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999435372226222, + "loss": 2.3135, + "step": 99 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999422613907262, + "loss": 2.127, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994097130924374, + "loss": 1.9954, + "step": 101 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993966697835883, + "loss": 2.1363, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993834839825738, + "loss": 1.7779, + "step": 103 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993701556912742, + "loss": 2.0923, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993566849115898, + "loss": 1.9183, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993430716454413, + "loss": 1.7894, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993293158947694, + "loss": 2.0094, + "step": 107 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999315417661536, + "loss": 2.1469, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999301376947722, + "loss": 1.6924, + "step": 109 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999287193755329, + "loss": 2.1794, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199927286808638, + "loss": 2.1338, + "step": 111 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019992583999429178, + "loss": 1.9988, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999243789327004, + "loss": 2.0735, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999229036240723, + "loss": 2.0521, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019992141406861776, + "loss": 1.9441, + "step": 115 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991991026654918, + "loss": 2.1244, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999183922180809, + "loss": 1.7937, + "step": 117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999168599234295, + "loss": 2.2603, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991531338281332, + "loss": 2.1846, + "step": 119 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991375259645293, + "loss": 2.3241, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991217756457085, + "loss": 2.0926, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991058828739165, + "loss": 2.0092, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990898476514193, + "loss": 1.8076, + "step": 123 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990736699805029, + "loss": 2.0369, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990573498634742, + "loss": 2.0488, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199904088730266, + "loss": 2.1534, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990242823004074, + "loss": 2.1406, + "step": 127 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990075348590839, + "loss": 1.9379, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019989906449810775, + "loss": 1.9781, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989736126687963, + "loss": 1.973, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989564379246683, + "loss": 1.6825, + "step": 131 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989391207511428, + "loss": 2.0843, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989216611506887, + "loss": 1.8547, + "step": 133 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989040591257952, + "loss": 1.7626, + "step": 134 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001998886314678972, + "loss": 2.0531, + "step": 135 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988684278127497, + "loss": 2.0031, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988503985296773, + "loss": 1.9342, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988322268323268, + "loss": 2.3297, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988139127232878, + "loss": 2.3401, + "step": 139 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987954562051725, + "loss": 1.8983, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001998776857280612, + "loss": 2.0621, + "step": 141 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987581159522578, + "loss": 2.0574, + "step": 142 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987392322227824, + "loss": 1.9516, + "step": 143 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987202060948783, + "loss": 2.1402, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987010375712577, + "loss": 1.8903, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986817266546539, + "loss": 1.8248, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986622733478204, + "loss": 1.9877, + "step": 147 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986426776535306, + "loss": 1.6272, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986229395745785, + "loss": 1.8605, + "step": 149 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986030591137783, + "loss": 1.6848, + "step": 150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985830362739647, + "loss": 2.1922, + "step": 151 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998562871057992, + "loss": 2.0238, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998542563468736, + "loss": 2.2246, + "step": 153 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985221135090914, + "loss": 1.9438, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019985015211819744, + "loss": 2.2136, + "step": 155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998480786490321, + "loss": 2.4563, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984599094370874, + "loss": 2.2138, + "step": 157 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984388900252503, + "loss": 2.2679, + "step": 158 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984177282578064, + "loss": 1.9537, + "step": 159 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998396424137773, + "loss": 2.0803, + "step": 160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998374977668188, + "loss": 2.0282, + "step": 161 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019983533888521087, + "loss": 2.0157, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998331657692613, + "loss": 1.7837, + "step": 163 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019983097841928, + "loss": 2.1556, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982877683557879, + "loss": 2.1447, + "step": 165 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982656101847162, + "loss": 2.4139, + "step": 166 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998243309682743, + "loss": 1.6788, + "step": 167 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982208668530493, + "loss": 1.9008, + "step": 168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998198281698834, + "loss": 2.173, + "step": 169 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019981755542233177, + "loss": 2.1837, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981526844297404, + "loss": 2.0639, + "step": 171 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981296723213632, + "loss": 2.3864, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981065179014673, + "loss": 1.923, + "step": 173 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980832211733535, + "loss": 1.9192, + "step": 174 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980597821403438, + "loss": 2.0335, + "step": 175 + }, + { + "epoch": 0.21, + "learning_rate": 0.000199803620080578, + "loss": 1.8172, + "step": 176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001998012477173024, + "loss": 2.0294, + "step": 177 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979886112454586, + "loss": 2.2889, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979646030264867, + "loss": 1.8498, + "step": 179 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997940452519531, + "loss": 2.0797, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997916159728035, + "loss": 2.2356, + "step": 181 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997891724655462, + "loss": 2.1187, + "step": 182 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978671473052964, + "loss": 1.9301, + "step": 183 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978424276810423, + "loss": 1.8582, + "step": 184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997817565786224, + "loss": 2.144, + "step": 185 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977925616243862, + "loss": 2.0595, + "step": 186 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977674151990945, + "loss": 1.9104, + "step": 187 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977421265139332, + "loss": 1.9727, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977166955725088, + "loss": 1.8727, + "step": 189 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997691122378447, + "loss": 2.0611, + "step": 190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997665406935394, + "loss": 2.0745, + "step": 191 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997639549247016, + "loss": 1.9974, + "step": 192 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019976135493169996, + "loss": 1.9856, + "step": 193 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975874071490526, + "loss": 1.778, + "step": 194 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975611227469016, + "loss": 1.8347, + "step": 195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997534696114294, + "loss": 1.5555, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019975081272549989, + "loss": 1.5625, + "step": 197 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974814161728032, + "loss": 1.9997, + "step": 198 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974545628715157, + "loss": 1.9523, + "step": 199 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974275673549654, + "loss": 2.1557, + "step": 200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974004296270006, + "loss": 1.8306, + "step": 201 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973731496914914, + "loss": 2.0051, + "step": 202 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973457275523264, + "loss": 2.201, + "step": 203 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997318163213416, + "loss": 2.2446, + "step": 204 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972904566786903, + "loss": 2.1172, + "step": 205 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972626079520995, + "loss": 1.9849, + "step": 206 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972346170376142, + "loss": 1.9774, + "step": 207 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001997206483939225, + "loss": 1.7625, + "step": 208 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971782086609436, + "loss": 2.2346, + "step": 209 + }, + { + "epoch": 0.25, + "eval_loss": 2.00066876411438, + "eval_runtime": 282.7648, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 209 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971497912068013, + "loss": 2.4185, + "step": 210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971212315808497, + "loss": 1.946, + "step": 211 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019970925297871605, + "loss": 2.0049, + "step": 212 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970636858298267, + "loss": 1.9545, + "step": 213 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970346997129598, + "loss": 1.9636, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970055714406938, + "loss": 1.9068, + "step": 215 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969763010171807, + "loss": 1.5749, + "step": 216 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969468884465942, + "loss": 1.7676, + "step": 217 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996917333733128, + "loss": 2.0329, + "step": 218 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996887636880996, + "loss": 1.9307, + "step": 219 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019968577978944323, + "loss": 2.134, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019968278167776908, + "loss": 2.0911, + "step": 221 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967976935350467, + "loss": 2.5057, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001996767428170795, + "loss": 1.9267, + "step": 223 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967370206892503, + "loss": 2.3569, + "step": 224 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967064710947488, + "loss": 1.992, + "step": 225 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966757793916454, + "loss": 2.01, + "step": 226 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966449455843165, + "loss": 1.8037, + "step": 227 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966139696771587, + "loss": 2.2498, + "step": 228 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019965828516745876, + "loss": 1.6563, + "step": 229 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996551591581041, + "loss": 1.979, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996520189400975, + "loss": 2.1553, + "step": 231 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996488645138867, + "loss": 1.8743, + "step": 232 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964569587992148, + "loss": 2.1907, + "step": 233 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964251303865362, + "loss": 2.0644, + "step": 234 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019963931599053692, + "loss": 2.1721, + "step": 235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996361047360272, + "loss": 2.2267, + "step": 236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996328792755823, + "loss": 1.9445, + "step": 237 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019962963960966213, + "loss": 2.2003, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996263857387286, + "loss": 2.3114, + "step": 239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996231176632456, + "loss": 1.8553, + "step": 240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961983538367914, + "loss": 2.1349, + "step": 241 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961653890049715, + "loss": 1.8784, + "step": 242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996132282141697, + "loss": 2.0118, + "step": 243 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960990332516874, + "loss": 1.9938, + "step": 244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960656423396834, + "loss": 2.2582, + "step": 245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019960321094104465, + "loss": 2.1807, + "step": 246 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959984344687578, + "loss": 1.9084, + "step": 247 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959646175194174, + "loss": 2.2879, + "step": 248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995930658567248, + "loss": 1.942, + "step": 249 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958965576170908, + "loss": 2.1313, + "step": 250 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958623146738088, + "loss": 2.3202, + "step": 251 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995827929742283, + "loss": 1.7832, + "step": 252 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957934028274162, + "loss": 1.7103, + "step": 253 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957587339341321, + "loss": 1.9912, + "step": 254 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995723923067373, + "loss": 1.6686, + "step": 255 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956889702321023, + "loss": 1.966, + "step": 256 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956538754333034, + "loss": 2.2287, + "step": 257 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956186386759804, + "loss": 1.4866, + "step": 258 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995583259965157, + "loss": 1.9599, + "step": 259 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019955477393058773, + "loss": 1.9273, + "step": 260 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995512076703206, + "loss": 1.847, + "step": 261 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019954762721622279, + "loss": 2.0535, + "step": 262 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995440325688048, + "loss": 2.4403, + "step": 263 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019954042372857908, + "loss": 1.8712, + "step": 264 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953680069606026, + "loss": 2.1837, + "step": 265 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953316347176488, + "loss": 2.0398, + "step": 266 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995295120562115, + "loss": 2.1135, + "step": 267 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952584644992075, + "loss": 2.0358, + "step": 268 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952216665341526, + "loss": 2.3282, + "step": 269 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995184726672197, + "loss": 1.9741, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951476449186074, + "loss": 1.7523, + "step": 271 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951104212786712, + "loss": 2.1509, + "step": 272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001995073055757695, + "loss": 2.0865, + "step": 273 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019950355483610067, + "loss": 1.8972, + "step": 274 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019949978990939542, + "loss": 2.4693, + "step": 275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994960107961905, + "loss": 1.9307, + "step": 276 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994922174970248, + "loss": 2.0097, + "step": 277 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994884100124391, + "loss": 1.6561, + "step": 278 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994845883429763, + "loss": 2.3069, + "step": 279 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019948075248918124, + "loss": 2.0134, + "step": 280 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947690245160091, + "loss": 2.1061, + "step": 281 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947303823078416, + "loss": 2.0855, + "step": 282 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946915982728197, + "loss": 1.5672, + "step": 283 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001994652672416473, + "loss": 1.7289, + "step": 284 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946136047443522, + "loss": 1.9013, + "step": 285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945743952620268, + "loss": 2.3105, + "step": 286 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945350439750872, + "loss": 2.341, + "step": 287 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944955508891443, + "loss": 1.88, + "step": 288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994455916009829, + "loss": 1.913, + "step": 289 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944161393427922, + "loss": 1.9513, + "step": 290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943762208937053, + "loss": 2.3331, + "step": 291 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943361606682597, + "loss": 2.3024, + "step": 292 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942959586721672, + "loss": 2.2222, + "step": 293 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942556149111598, + "loss": 2.1003, + "step": 294 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994215129390989, + "loss": 1.9038, + "step": 295 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941745021174282, + "loss": 1.6068, + "step": 296 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941337330962693, + "loss": 1.8894, + "step": 297 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940928223333252, + "loss": 2.3158, + "step": 298 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001994051769834429, + "loss": 2.1015, + "step": 299 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940105756054337, + "loss": 2.1519, + "step": 300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939692396522127, + "loss": 1.7233, + "step": 301 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939277619806598, + "loss": 1.85, + "step": 302 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938861425966887, + "loss": 2.2368, + "step": 303 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938443815062335, + "loss": 1.765, + "step": 304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993802478715248, + "loss": 1.6333, + "step": 305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937604342297073, + "loss": 2.191, + "step": 306 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937182480556055, + "loss": 2.2402, + "step": 307 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019936759201989577, + "loss": 2.0568, + "step": 308 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993633450665799, + "loss": 2.4314, + "step": 309 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935908394621844, + "loss": 2.0556, + "step": 310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935480865941894, + "loss": 2.0988, + "step": 311 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935051920679094, + "loss": 2.0964, + "step": 312 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019934621558894607, + "loss": 1.9365, + "step": 313 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993418978064979, + "loss": 1.6224, + "step": 314 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933756586006202, + "loss": 2.144, + "step": 315 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933321975025616, + "loss": 2.2899, + "step": 316 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019932885947769992, + "loss": 1.8865, + "step": 317 + }, + { + "epoch": 0.38, + "learning_rate": 0.000199324485043015, + "loss": 2.3996, + "step": 318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993200964468251, + "loss": 1.3858, + "step": 319 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019931569368975588, + "loss": 2.2231, + "step": 320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019931127677243516, + "loss": 2.0537, + "step": 321 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930684569549264, + "loss": 2.1381, + "step": 322 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930240045956012, + "loss": 2.0152, + "step": 323 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992979410652714, + "loss": 2.0293, + "step": 324 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019929346751326228, + "loss": 1.7457, + "step": 325 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928897980417057, + "loss": 1.987, + "step": 326 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928447793863616, + "loss": 2.2451, + "step": 327 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019927996191730093, + "loss": 2.3312, + "step": 328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992754317408087, + "loss": 1.8771, + "step": 329 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992708874098054, + "loss": 1.833, + "step": 330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926632892493896, + "loss": 1.9343, + "step": 331 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926175628685937, + "loss": 2.2328, + "step": 332 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992571694962185, + "loss": 1.9916, + "step": 333 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992525685536704, + "loss": 1.9497, + "step": 334 + }, + { + "epoch": 0.4, + "learning_rate": 0.000199247953459871, + "loss": 2.029, + "step": 335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019924332421547835, + "loss": 2.0326, + "step": 336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992386808211525, + "loss": 2.6406, + "step": 337 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019923402327755546, + "loss": 2.3811, + "step": 338 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922935158535129, + "loss": 1.6143, + "step": 339 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922466574520608, + "loss": 2.2182, + "step": 340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921996575778794, + "loss": 2.218, + "step": 341 + }, + { + "epoch": 0.41, + "learning_rate": 0.000199215251623767, + "loss": 1.8615, + "step": 342 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921052334381534, + "loss": 2.165, + "step": 343 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019920578091860716, + "loss": 2.1627, + "step": 344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001992010243488186, + "loss": 2.154, + "step": 345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919625363512786, + "loss": 1.5966, + "step": 346 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919146877821512, + "loss": 2.0903, + "step": 347 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991866697787626, + "loss": 2.2322, + "step": 348 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019918185663745456, + "loss": 1.9319, + "step": 349 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917702935497725, + "loss": 2.1367, + "step": 350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917218793201886, + "loss": 2.1767, + "step": 351 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019916733236926976, + "loss": 2.1009, + "step": 352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991624626674222, + "loss": 2.1286, + "step": 353 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991575788271705, + "loss": 2.181, + "step": 354 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019915268084921101, + "loss": 2.12, + "step": 355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019914776873424206, + "loss": 1.9895, + "step": 356 + }, + { + "epoch": 0.43, + "learning_rate": 0.000199142842482964, + "loss": 1.9285, + "step": 357 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991379020960792, + "loss": 2.2376, + "step": 358 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991329475742921, + "loss": 2.1274, + "step": 359 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912797891830908, + "loss": 2.0043, + "step": 360 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912299612883852, + "loss": 2.022, + "step": 361 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019911799920659093, + "loss": 1.7343, + "step": 362 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991129881522787, + "loss": 2.0621, + "step": 363 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019910796296661632, + "loss": 1.5116, + "step": 364 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991029236503203, + "loss": 2.0485, + "step": 365 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909787020410907, + "loss": 1.971, + "step": 366 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909280262870324, + "loss": 1.9724, + "step": 367 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908772092482524, + "loss": 1.318, + "step": 368 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908262509319964, + "loss": 2.0539, + "step": 369 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019907751513455302, + "loss": 2.1097, + "step": 370 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019907239104961392, + "loss": 2.0632, + "step": 371 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906725283911296, + "loss": 2.1897, + "step": 372 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906210050378266, + "loss": 2.2002, + "step": 373 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905693404435773, + "loss": 1.9005, + "step": 374 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905175346157474, + "loss": 1.9873, + "step": 375 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904655875617233, + "loss": 1.7215, + "step": 376 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904134992889113, + "loss": 2.0434, + "step": 377 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903612698047383, + "loss": 2.4223, + "step": 378 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903088991166513, + "loss": 2.0837, + "step": 379 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902563872321172, + "loss": 2.2389, + "step": 380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902037341586225, + "loss": 1.7205, + "step": 381 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001990150939903675, + "loss": 1.9577, + "step": 382 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019900980044748015, + "loss": 1.8778, + "step": 383 + }, + { + "epoch": 0.46, + "learning_rate": 0.000199004492787955, + "loss": 2.2213, + "step": 384 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899917101254874, + "loss": 2.0927, + "step": 385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899383512202019, + "loss": 2.2921, + "step": 386 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001989884851171301, + "loss": 2.2983, + "step": 387 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989831209986413, + "loss": 1.8052, + "step": 388 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897774276731857, + "loss": 1.7741, + "step": 389 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897235042392873, + "loss": 1.779, + "step": 390 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896694396924063, + "loss": 1.6924, + "step": 391 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896152340402509, + "loss": 2.036, + "step": 392 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019895608872905494, + "loss": 2.04, + "step": 393 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989506399451051, + "loss": 2.1702, + "step": 394 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019894517705295245, + "loss": 1.9429, + "step": 395 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893970005337584, + "loss": 2.0528, + "step": 396 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893420894715618, + "loss": 1.7906, + "step": 397 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989287037350764, + "loss": 2.3494, + "step": 398 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019892318441792138, + "loss": 1.7415, + "step": 399 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989176509964781, + "loss": 2.0184, + "step": 400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989121034715355, + "loss": 1.9277, + "step": 401 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989065418438845, + "loss": 2.2168, + "step": 402 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019890096611431814, + "loss": 2.6114, + "step": 403 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019889537628363133, + "loss": 2.0713, + "step": 404 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888977235262104, + "loss": 2.2966, + "step": 405 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888415432208636, + "loss": 2.5206, + "step": 406 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887852219282822, + "loss": 2.4503, + "step": 407 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887287596564966, + "loss": 2.102, + "step": 408 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886721564135572, + "loss": 2.3275, + "step": 409 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886154122075343, + "loss": 2.0481, + "step": 410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885585270465182, + "loss": 1.8395, + "step": 411 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885015009386202, + "loss": 2.3535, + "step": 412 + }, + { + "epoch": 0.5, + "learning_rate": 0.000198844433389197, + "loss": 2.0147, + "step": 413 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988387025914719, + "loss": 2.1919, + "step": 414 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988329577015038, + "loss": 2.156, + "step": 415 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882719872011176, + "loss": 2.2672, + "step": 416 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882142564811694, + "loss": 2.3242, + "step": 417 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988156384863424, + "loss": 2.0259, + "step": 418 + }, + { + "epoch": 0.5, + "eval_loss": 1.9941134452819824, + "eval_runtime": 282.533, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 418 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880983723561332, + "loss": 1.7039, + "step": 419 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880402189675678, + "loss": 2.1007, + "step": 420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879819247060193, + "loss": 2.2297, + "step": 421 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879234895797996, + "loss": 1.6166, + "step": 422 + }, + { + "epoch": 0.51, + "learning_rate": 0.000198786491359724, + "loss": 2.408, + "step": 423 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019878061967666915, + "loss": 1.686, + "step": 424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001987747339096527, + "loss": 2.0492, + "step": 425 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876883405951377, + "loss": 2.2179, + "step": 426 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876292012709356, + "loss": 1.8812, + "step": 427 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019875699211323528, + "loss": 2.2888, + "step": 428 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019875105001878409, + "loss": 2.0561, + "step": 429 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019874509384458725, + "loss": 1.9299, + "step": 430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873912359149397, + "loss": 2.1999, + "step": 431 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873313926035548, + "loss": 1.8509, + "step": 432 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019872714085202503, + "loss": 1.8281, + "step": 433 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987211283673578, + "loss": 1.8359, + "step": 434 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987151018072111, + "loss": 2.2844, + "step": 435 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870906117244416, + "loss": 1.9397, + "step": 436 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870300646391824, + "loss": 2.302, + "step": 437 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869693768249661, + "loss": 2.1176, + "step": 438 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869085482904458, + "loss": 2.1909, + "step": 439 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986847579044294, + "loss": 2.2382, + "step": 440 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867864690952035, + "loss": 2.0988, + "step": 441 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867252184518878, + "loss": 2.2136, + "step": 442 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986663827123079, + "loss": 1.9324, + "step": 443 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019866022951175308, + "loss": 2.1274, + "step": 444 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019865406224440165, + "loss": 1.8625, + "step": 445 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019864788091113287, + "loss": 2.0009, + "step": 446 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001986416855128281, + "loss": 2.2245, + "step": 447 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019863547605037063, + "loss": 2.0654, + "step": 448 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862925252464586, + "loss": 1.4339, + "step": 449 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862301493654108, + "loss": 2.1347, + "step": 450 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861676328694562, + "loss": 1.7029, + "step": 451 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861049757675088, + "loss": 2.0081, + "step": 452 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019860421780685018, + "loss": 1.9994, + "step": 453 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985979239781389, + "loss": 1.9325, + "step": 454 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019859161609151436, + "loss": 1.8502, + "step": 455 + }, + { + "epoch": 0.55, + "learning_rate": 0.000198585294147876, + "loss": 2.3779, + "step": 456 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019857895814812509, + "loss": 2.0303, + "step": 457 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985726080931651, + "loss": 1.9898, + "step": 458 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019856624398390137, + "loss": 1.7648, + "step": 459 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019855986582124126, + "loss": 1.7822, + "step": 460 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985534736060942, + "loss": 1.9219, + "step": 461 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019854706733937155, + "loss": 2.1789, + "step": 462 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019854064702198675, + "loss": 1.9091, + "step": 463 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019853421265485514, + "loss": 1.9941, + "step": 464 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985277642388941, + "loss": 1.904, + "step": 465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019852130177502316, + "loss": 1.6299, + "step": 466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985148252641636, + "loss": 1.7712, + "step": 467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019850833470723886, + "loss": 1.6825, + "step": 468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985018301051744, + "loss": 1.7408, + "step": 469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019849531145889758, + "loss": 2.0622, + "step": 470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019848877876933784, + "loss": 1.5699, + "step": 471 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001984822320374266, + "loss": 2.0253, + "step": 472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019847567126409724, + "loss": 2.2186, + "step": 473 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019846909645028523, + "loss": 2.0872, + "step": 474 + }, + { + "epoch": 0.57, + "learning_rate": 0.000198462507596928, + "loss": 1.9362, + "step": 475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019845590470496497, + "loss": 2.4109, + "step": 476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844928777533753, + "loss": 2.2626, + "step": 477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844265680898918, + "loss": 2.0874, + "step": 478 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984360118068653, + "loss": 2.1606, + "step": 479 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984293527699133, + "loss": 2.063, + "step": 480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019842267969908265, + "loss": 1.9065, + "step": 481 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984159925953248, + "loss": 1.9511, + "step": 482 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840929145959317, + "loss": 2.056, + "step": 483 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840257629284317, + "loss": 2.2353, + "step": 484 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019839584709603226, + "loss": 1.9401, + "step": 485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001983891038701199, + "loss": 1.9648, + "step": 486 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019838234661606748, + "loss": 1.753, + "step": 487 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019837557533483846, + "loss": 1.7805, + "step": 488 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836879002739827, + "loss": 2.192, + "step": 489 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836199069471437, + "loss": 1.9112, + "step": 490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019835517733775615, + "loss": 2.0119, + "step": 491 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001983483499574951, + "loss": 1.8932, + "step": 492 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019834150855490464, + "loss": 1.5968, + "step": 493 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019833465313096017, + "loss": 2.1493, + "step": 494 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019832778368663917, + "loss": 1.8863, + "step": 495 + }, + { + "epoch": 0.6, + "learning_rate": 0.000198320900222921, + "loss": 2.2134, + "step": 496 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019831400274078717, + "loss": 2.2831, + "step": 497 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019830709124122112, + "loss": 2.0266, + "step": 498 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001983001657252082, + "loss": 2.3392, + "step": 499 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019829322619373588, + "loss": 1.8426, + "step": 500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019828627264779363, + "loss": 2.0742, + "step": 501 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001982793050883728, + "loss": 1.9578, + "step": 502 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019827232351646686, + "loss": 2.0863, + "step": 503 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982653279330712, + "loss": 2.2881, + "step": 504 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019825831833918323, + "loss": 1.8869, + "step": 505 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982512947358024, + "loss": 1.8997, + "step": 506 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019824425712393012, + "loss": 1.8945, + "step": 507 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019823720550456977, + "loss": 1.9496, + "step": 508 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982301398787268, + "loss": 2.1066, + "step": 509 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019822306024740852, + "loss": 1.958, + "step": 510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019821596661162447, + "loss": 2.1112, + "step": 511 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019820885897238596, + "loss": 2.1012, + "step": 512 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001982017373307064, + "loss": 2.2623, + "step": 513 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019819460168760117, + "loss": 2.5058, + "step": 514 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981874520440877, + "loss": 2.1367, + "step": 515 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019818028840118532, + "loss": 2.2743, + "step": 516 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019817311075991543, + "loss": 1.5517, + "step": 517 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981659191213014, + "loss": 1.9569, + "step": 518 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815871348636863, + "loss": 2.0566, + "step": 519 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815149385614444, + "loss": 1.8859, + "step": 520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019814426023165825, + "loss": 2.0298, + "step": 521 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019813701261394136, + "loss": 2.0614, + "step": 522 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812975100402715, + "loss": 2.221, + "step": 523 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812247540295096, + "loss": 2.1255, + "step": 524 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019811518581175014, + "loss": 2.1885, + "step": 525 + }, + { + "epoch": 0.63, + "learning_rate": 0.000198107882231464, + "loss": 2.3918, + "step": 526 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019810056466313392, + "loss": 2.2759, + "step": 527 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019809323310780318, + "loss": 1.9727, + "step": 528 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980858875665171, + "loss": 2.0417, + "step": 529 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019807852804032305, + "loss": 1.645, + "step": 530 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980711545302703, + "loss": 1.7943, + "step": 531 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019806376703741015, + "loss": 1.8844, + "step": 532 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019805636556279588, + "loss": 2.1128, + "step": 533 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980489501074828, + "loss": 2.0272, + "step": 534 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019804152067252816, + "loss": 2.0916, + "step": 535 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019803407725899131, + "loss": 1.7287, + "step": 536 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019802661986793342, + "loss": 2.0667, + "step": 537 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801914850041784, + "loss": 2.4016, + "step": 538 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801166315750978, + "loss": 1.8557, + "step": 539 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001980041638402765, + "loss": 1.8072, + "step": 540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019799665054978722, + "loss": 2.2252, + "step": 541 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798912328711322, + "loss": 2.1377, + "step": 542 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798158205332764, + "loss": 2.0306, + "step": 543 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019797402684950576, + "loss": 1.7428, + "step": 544 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019796645767672477, + "loss": 2.0843, + "step": 545 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795887453606388, + "loss": 1.9175, + "step": 546 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795127742860423, + "loss": 1.6673, + "step": 547 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001979436663554291, + "loss": 1.5553, + "step": 548 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019793604131762357, + "loss": 1.604, + "step": 549 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792840231627482, + "loss": 2.023, + "step": 550 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792074935247206, + "loss": 1.8399, + "step": 551 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019791308242730638, + "loss": 1.8579, + "step": 552 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019790540154187094, + "loss": 2.2135, + "step": 553 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019789770669726087, + "loss": 1.7894, + "step": 554 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788999789457326, + "loss": 2.1723, + "step": 555 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788227513490723, + "loss": 2.0881, + "step": 556 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019787453841936393, + "loss": 1.7181, + "step": 557 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019786678774904638, + "loss": 1.8725, + "step": 558 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785902312505964, + "loss": 2.0544, + "step": 559 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785124454851084, + "loss": 1.7503, + "step": 560 + }, + { + "epoch": 0.67, + "learning_rate": 0.000197843452020509, + "loss": 2.01, + "step": 561 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019783564554216518, + "loss": 1.748, + "step": 562 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978278251145924, + "loss": 2.0866, + "step": 563 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978199907389057, + "loss": 1.6046, + "step": 564 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019781214241622208, + "loss": 1.9222, + "step": 565 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019780428014766051, + "loss": 2.2003, + "step": 566 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019779640393434206, + "loss": 2.0534, + "step": 567 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001977885137773896, + "loss": 1.8609, + "step": 568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019778060967792817, + "loss": 2.0666, + "step": 569 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019777269163708468, + "loss": 1.9512, + "step": 570 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019776475965598814, + "loss": 1.8349, + "step": 571 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977568137357694, + "loss": 2.0507, + "step": 572 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019774885387756138, + "loss": 1.7588, + "step": 573 + }, + { + "epoch": 0.69, + "learning_rate": 0.000197740880082499, + "loss": 2.0981, + "step": 574 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019773289235171918, + "loss": 2.0953, + "step": 575 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019772489068636077, + "loss": 2.0678, + "step": 576 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019771687508756466, + "loss": 2.0136, + "step": 577 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977088455564736, + "loss": 1.9781, + "step": 578 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019770080209423254, + "loss": 2.2185, + "step": 579 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019769274470198827, + "loss": 1.8076, + "step": 580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019768467338088957, + "loss": 1.6888, + "step": 581 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019767658813208726, + "loss": 2.1273, + "step": 582 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001976684889567341, + "loss": 2.3232, + "step": 583 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019766037585598487, + "loss": 2.366, + "step": 584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019765224883099635, + "loss": 1.8939, + "step": 585 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019764410788292722, + "loss": 2.0162, + "step": 586 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019763595301293822, + "loss": 2.2752, + "step": 587 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976277842221921, + "loss": 1.9461, + "step": 588 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976196015118535, + "loss": 1.9999, + "step": 589 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976114048830891, + "loss": 2.0169, + "step": 590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019760319433706757, + "loss": 2.1838, + "step": 591 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019759496987495955, + "loss": 2.3513, + "step": 592 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975867314979377, + "loss": 1.9915, + "step": 593 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975784792071766, + "loss": 2.1973, + "step": 594 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019757021300385286, + "loss": 2.3112, + "step": 595 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019756193288914507, + "loss": 2.0992, + "step": 596 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019755363886423376, + "loss": 2.4266, + "step": 597 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019754533093030148, + "loss": 1.7649, + "step": 598 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975370090885328, + "loss": 1.7573, + "step": 599 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019752867334011423, + "loss": 1.7949, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975203236862342, + "loss": 2.0229, + "step": 601 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019751196012808325, + "loss": 2.0519, + "step": 602 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019750358266685383, + "loss": 2.0829, + "step": 603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019749519130374038, + "loss": 2.0153, + "step": 604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019748678603993933, + "loss": 1.8594, + "step": 605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019747836687664908, + "loss": 2.1385, + "step": 606 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746993381507003, + "loss": 2.1317, + "step": 607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746148685640451, + "loss": 1.1676, + "step": 608 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974530260018569, + "loss": 2.2856, + "step": 609 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974445512526336, + "loss": 2.1973, + "step": 610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019743606260994278, + "loss": 1.6912, + "step": 611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019742756007499486, + "loss": 1.8091, + "step": 612 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741904364900208, + "loss": 2.0108, + "step": 613 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741051333317867, + "loss": 2.1061, + "step": 614 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019740196912874087, + "loss": 1.8934, + "step": 615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019739341103690693, + "loss": 1.8599, + "step": 616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019738483905889703, + "loss": 2.0025, + "step": 617 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019737625319593335, + "loss": 1.8247, + "step": 618 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019736765344924005, + "loss": 2.222, + "step": 619 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019735903982004324, + "loss": 2.116, + "step": 620 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973504123095711, + "loss": 1.9183, + "step": 621 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973417709190536, + "loss": 2.1507, + "step": 622 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019733311564972296, + "loss": 1.7899, + "step": 623 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019732444650281315, + "loss": 2.1005, + "step": 624 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973157634795602, + "loss": 2.2391, + "step": 625 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019730706658120214, + "loss": 1.9466, + "step": 626 + }, + { + "epoch": 0.75, + "learning_rate": 0.000197298355808979, + "loss": 1.9854, + "step": 627 + }, + { + "epoch": 0.75, + "eval_loss": 1.9957869052886963, + "eval_runtime": 282.5544, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 627 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019728963116413266, + "loss": 2.1877, + "step": 628 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019728089264790712, + "loss": 2.2194, + "step": 629 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019727214026154827, + "loss": 1.9631, + "step": 630 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019726337400630405, + "loss": 2.3506, + "step": 631 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019725459388342432, + "loss": 2.0543, + "step": 632 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972457998941609, + "loss": 2.0402, + "step": 633 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019723699203976766, + "loss": 1.9316, + "step": 634 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972281703215004, + "loss": 2.2024, + "step": 635 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721933474061692, + "loss": 1.6776, + "step": 636 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721048529837694, + "loss": 1.9757, + "step": 637 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019720162199604222, + "loss": 1.7631, + "step": 638 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019719274483487648, + "loss": 2.34, + "step": 639 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001971838538161454, + "loss": 1.8469, + "step": 640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019717494894111662, + "loss": 2.3151, + "step": 641 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019716603021105987, + "loss": 2.0661, + "step": 642 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019715709762724667, + "loss": 2.0408, + "step": 643 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019714815119095062, + "loss": 1.9848, + "step": 644 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019713919090344736, + "loss": 2.3134, + "step": 645 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019713021676601438, + "loss": 2.4947, + "step": 646 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001971212287799312, + "loss": 2.0515, + "step": 647 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019711222694647932, + "loss": 2.6216, + "step": 648 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019710321126694216, + "loss": 1.6517, + "step": 649 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001970941817426052, + "loss": 2.0408, + "step": 650 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019708513837475588, + "loss": 1.8841, + "step": 651 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019707608116468356, + "loss": 2.1966, + "step": 652 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019706701011367955, + "loss": 1.7587, + "step": 653 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970579252230373, + "loss": 2.2196, + "step": 654 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019704882649405198, + "loss": 1.8146, + "step": 655 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703971392802098, + "loss": 2.2932, + "step": 656 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703058752624353, + "loss": 1.923, + "step": 657 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970214472900208, + "loss": 2.2393, + "step": 658 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019701229322065605, + "loss": 1.7338, + "step": 659 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019700312531945442, + "loss": 1.7859, + "step": 660 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019699394358772306, + "loss": 2.2719, + "step": 661 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019698474802677107, + "loss": 1.576, + "step": 662 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019697553863790956, + "loss": 2.3333, + "step": 663 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019696631542245156, + "loss": 2.3508, + "step": 664 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019695707838171216, + "loss": 2.1876, + "step": 665 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019694782751700828, + "loss": 1.4863, + "step": 666 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019693856282965898, + "loss": 1.8948, + "step": 667 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019692928432098512, + "loss": 1.6867, + "step": 668 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691999199230963, + "loss": 1.7682, + "step": 669 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691068584495742, + "loss": 2.0914, + "step": 670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019690136588025535, + "loss": 2.1413, + "step": 671 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019689203209953223, + "loss": 2.1275, + "step": 672 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001968826845041188, + "loss": 1.9556, + "step": 673 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019687332309534792, + "loss": 2.2209, + "step": 674 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019686394787455424, + "loss": 1.9853, + "step": 675 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019685455884307454, + "loss": 2.0877, + "step": 676 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019684515600224743, + "loss": 2.1607, + "step": 677 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019683573935341358, + "loss": 2.2664, + "step": 678 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019682630889791556, + "loss": 1.8527, + "step": 679 + }, + { + "epoch": 0.82, + "learning_rate": 0.000196816864637098, + "loss": 1.8417, + "step": 680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019680740657230738, + "loss": 1.9853, + "step": 681 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019679793470489228, + "loss": 1.8419, + "step": 682 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019678844903620317, + "loss": 1.9971, + "step": 683 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019677894956759246, + "loss": 1.9843, + "step": 684 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019676943630041462, + "loss": 2.376, + "step": 685 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675990923602598, + "loss": 2.1558, + "step": 686 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675036837578494, + "loss": 1.5752, + "step": 687 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001967408137210518, + "loss": 1.6704, + "step": 688 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019673124527318881, + "loss": 2.1389, + "step": 689 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019672166303356028, + "loss": 2.126, + "step": 690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019671206700353237, + "loss": 1.9402, + "step": 691 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019670245718447335, + "loss": 1.6701, + "step": 692 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019669283357775328, + "loss": 1.8134, + "step": 693 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001966831961847443, + "loss": 2.1642, + "step": 694 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019667354500682054, + "loss": 1.8455, + "step": 695 + }, + { + "epoch": 0.84, + "learning_rate": 0.000196663880045358, + "loss": 1.9646, + "step": 696 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966542013017347, + "loss": 1.9855, + "step": 697 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019664450877733062, + "loss": 1.7029, + "step": 698 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019663480247352773, + "loss": 1.9789, + "step": 699 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966250823917099, + "loss": 1.8751, + "step": 700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019661534853326301, + "loss": 2.3644, + "step": 701 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019660560089957492, + "loss": 1.8006, + "step": 702 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001965958394920354, + "loss": 2.2799, + "step": 703 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019658606431203622, + "loss": 1.9258, + "step": 704 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965762753609711, + "loss": 1.9521, + "step": 705 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019656647264023575, + "loss": 1.9675, + "step": 706 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019655665615122783, + "loss": 2.3686, + "step": 707 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019654682589534693, + "loss": 2.1448, + "step": 708 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019653698187399466, + "loss": 2.2475, + "step": 709 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965271240885745, + "loss": 1.9417, + "step": 710 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965172525404921, + "loss": 2.154, + "step": 711 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019650736723115475, + "loss": 2.0646, + "step": 712 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019649746816197196, + "loss": 2.235, + "step": 713 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019648755533435518, + "loss": 1.7122, + "step": 714 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019647762874971765, + "loss": 2.0635, + "step": 715 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019646768840947474, + "loss": 1.8904, + "step": 716 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019645773431504373, + "loss": 1.608, + "step": 717 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019644776646784388, + "loss": 2.2307, + "step": 718 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001964377848692963, + "loss": 2.176, + "step": 719 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019642778952082426, + "loss": 2.1984, + "step": 720 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001964177804238528, + "loss": 2.2625, + "step": 721 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019640775757980903, + "loss": 2.3142, + "step": 722 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019639772099012197, + "loss": 2.2366, + "step": 723 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019638767065622266, + "loss": 1.7823, + "step": 724 + }, + { + "epoch": 0.87, + "learning_rate": 0.000196377606579544, + "loss": 2.0677, + "step": 725 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019636752876152095, + "loss": 1.3337, + "step": 726 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019635743720359037, + "loss": 2.055, + "step": 727 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001963473319071911, + "loss": 1.9888, + "step": 728 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019633721287376393, + "loss": 1.9258, + "step": 729 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019632708010475165, + "loss": 2.3768, + "step": 730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001963169336015989, + "loss": 1.993, + "step": 731 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019630677336575242, + "loss": 2.1989, + "step": 732 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001962965993986608, + "loss": 2.1216, + "step": 733 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019628641170177464, + "loss": 2.2217, + "step": 734 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019627621027654648, + "loss": 1.8809, + "step": 735 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019626599512443077, + "loss": 2.0864, + "step": 736 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019625576624688406, + "loss": 2.0627, + "step": 737 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019624552364536473, + "loss": 2.1347, + "step": 738 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019623526732133315, + "loss": 1.9998, + "step": 739 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019622499727625162, + "loss": 2.1998, + "step": 740 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019621471351158443, + "loss": 1.974, + "step": 741 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019620441602879787, + "loss": 1.9425, + "step": 742 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019619410482936008, + "loss": 2.6227, + "step": 743 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019618377991474124, + "loss": 2.1209, + "step": 744 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019617344128641345, + "loss": 2.0606, + "step": 745 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019616308894585078, + "loss": 2.296, + "step": 746 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019615272289452923, + "loss": 2.0415, + "step": 747 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961423431339268, + "loss": 1.9516, + "step": 748 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961319496655234, + "loss": 2.0468, + "step": 749 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961215424908009, + "loss": 1.877, + "step": 750 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961111216112432, + "loss": 1.8129, + "step": 751 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019610068702833596, + "loss": 1.9984, + "step": 752 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019609023874356707, + "loss": 1.9013, + "step": 753 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019607977675842615, + "loss": 2.0546, + "step": 754 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019606930107440485, + "loss": 2.2817, + "step": 755 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960588116929968, + "loss": 2.0578, + "step": 756 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019604830861569755, + "loss": 2.3521, + "step": 757 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019603779184400457, + "loss": 2.0392, + "step": 758 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960272613794174, + "loss": 1.9863, + "step": 759 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019601671722343738, + "loss": 2.1889, + "step": 760 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960061593775679, + "loss": 2.0908, + "step": 761 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001959955878433143, + "loss": 1.986, + "step": 762 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019598500262218386, + "loss": 2.0339, + "step": 763 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019597440371568574, + "loss": 2.0958, + "step": 764 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001959637911253312, + "loss": 1.9866, + "step": 765 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019595316485263327, + "loss": 2.2228, + "step": 766 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019594252489910706, + "loss": 1.915, + "step": 767 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019593187126626965, + "loss": 2.0741, + "step": 768 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019592120395563994, + "loss": 2.5346, + "step": 769 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019591052296873888, + "loss": 2.4908, + "step": 770 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019589982830708937, + "loss": 2.1042, + "step": 771 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019588911997221625, + "loss": 1.8676, + "step": 772 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958783979656462, + "loss": 1.9152, + "step": 773 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019586766228890806, + "loss": 1.7784, + "step": 774 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958569129435324, + "loss": 2.0784, + "step": 775 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958461499310519, + "loss": 1.7262, + "step": 776 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019583537325300118, + "loss": 2.4154, + "step": 777 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019582458291091663, + "loss": 2.3185, + "step": 778 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019581377890633684, + "loss": 2.0981, + "step": 779 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019580296124080212, + "loss": 1.8952, + "step": 780 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019579212991585493, + "loss": 1.7208, + "step": 781 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019578128493303955, + "loss": 2.0209, + "step": 782 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019577042629390217, + "loss": 2.1867, + "step": 783 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957595539999911, + "loss": 2.0805, + "step": 784 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019574866805285645, + "loss": 2.0451, + "step": 785 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019573776845405028, + "loss": 2.2056, + "step": 786 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957268552051267, + "loss": 2.0773, + "step": 787 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019571592830764165, + "loss": 2.2036, + "step": 788 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019570498776315309, + "loss": 1.7298, + "step": 789 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956940335732209, + "loss": 1.8931, + "step": 790 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956830657394069, + "loss": 2.1567, + "step": 791 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019567208426327488, + "loss": 1.9471, + "step": 792 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019566108914639054, + "loss": 1.8916, + "step": 793 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019565008039032158, + "loss": 2.0111, + "step": 794 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019563905799663752, + "loss": 2.1374, + "step": 795 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019562802196691003, + "loss": 2.3083, + "step": 796 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019561697230271254, + "loss": 2.0381, + "step": 797 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001956059090056205, + "loss": 2.1909, + "step": 798 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019559483207721133, + "loss": 1.9893, + "step": 799 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955837415190643, + "loss": 2.3178, + "step": 800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955726373327607, + "loss": 2.0815, + "step": 801 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019556151951988376, + "loss": 1.6012, + "step": 802 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019555038808201865, + "loss": 1.4965, + "step": 803 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019553924302075242, + "loss": 2.3069, + "step": 804 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019552808433767415, + "loss": 2.2388, + "step": 805 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019551691203437482, + "loss": 2.5662, + "step": 806 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019550572611244738, + "loss": 1.9419, + "step": 807 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019549452657348663, + "loss": 2.3638, + "step": 808 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019548331341908947, + "loss": 2.1567, + "step": 809 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019547208665085457, + "loss": 1.9697, + "step": 810 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019546084627038268, + "loss": 1.9006, + "step": 811 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001954495922792764, + "loss": 2.304, + "step": 812 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001954383246791403, + "loss": 2.0494, + "step": 813 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019542704347158093, + "loss": 1.8562, + "step": 814 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019541574865820672, + "loss": 2.1041, + "step": 815 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019540444024062804, + "loss": 2.22, + "step": 816 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019539311822045727, + "loss": 1.9925, + "step": 817 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019538178259930869, + "loss": 2.3213, + "step": 818 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019537043337879845, + "loss": 2.0319, + "step": 819 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019535907056054475, + "loss": 1.8578, + "step": 820 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019534769414616764, + "loss": 1.4115, + "step": 821 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953363041372892, + "loss": 2.0731, + "step": 822 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019532490053553335, + "loss": 2.0605, + "step": 823 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019531348334252607, + "loss": 1.9044, + "step": 824 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953020525598951, + "loss": 1.7405, + "step": 825 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001952906081892703, + "loss": 1.898, + "step": 826 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019527915023228332, + "loss": 1.9696, + "step": 827 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019526767869056788, + "loss": 2.0469, + "step": 828 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019525619356575952, + "loss": 2.0307, + "step": 829 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019524469485949583, + "loss": 2.002, + "step": 830 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019523318257341622, + "loss": 1.9438, + "step": 831 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019522165670916207, + "loss": 1.535, + "step": 832 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001952101172683768, + "loss": 1.7505, + "step": 833 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019519856425270562, + "loss": 2.2248, + "step": 834 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019518699766379576, + "loss": 2.0669, + "step": 835 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019517541750329635, + "loss": 2.0268, + "step": 836 + }, + { + "epoch": 1.0, + "eval_loss": 1.9969017505645752, + "eval_runtime": 283.3157, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 836 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019516382377285848, + "loss": 1.6712, + "step": 837 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001951522164741352, + "loss": 2.1558, + "step": 838 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019514059560878138, + "loss": 2.1599, + "step": 839 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019512896117845392, + "loss": 1.8762, + "step": 840 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019511731318481168, + "loss": 2.0189, + "step": 841 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019510565162951537, + "loss": 1.9364, + "step": 842 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019509397651422769, + "loss": 1.7319, + "step": 843 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019508228784061326, + "loss": 1.9424, + "step": 844 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001950705856103386, + "loss": 2.277, + "step": 845 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019505886982507225, + "loss": 1.6511, + "step": 846 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950471404864846, + "loss": 1.9056, + "step": 847 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019503539759624798, + "loss": 1.5105, + "step": 848 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950236411560367, + "loss": 1.9469, + "step": 849 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019501187116752693, + "loss": 1.5012, + "step": 850 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019500008763239683, + "loss": 1.7086, + "step": 851 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019498829055232647, + "loss": 1.5586, + "step": 852 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019497647992899788, + "loss": 1.5573, + "step": 853 + }, + { + "epoch": 1.01, + "learning_rate": 0.000194964655764095, + "loss": 2.0757, + "step": 854 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019495281805930367, + "loss": 1.5478, + "step": 855 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019494096681631172, + "loss": 1.7068, + "step": 856 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019492910203680884, + "loss": 1.6759, + "step": 857 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001949172237224867, + "loss": 1.4621, + "step": 858 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019490533187503892, + "loss": 1.5359, + "step": 859 + }, + { + "epoch": 1.02, + "learning_rate": 0.000194893426496161, + "loss": 1.9365, + "step": 860 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019488150758755035, + "loss": 1.7089, + "step": 861 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019486957515090641, + "loss": 1.4924, + "step": 862 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019485762918793046, + "loss": 1.387, + "step": 863 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001948456697003257, + "loss": 1.631, + "step": 864 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019483369668979732, + "loss": 1.7953, + "step": 865 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019482171015805245, + "loss": 1.7552, + "step": 866 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019480971010680002, + "loss": 1.8313, + "step": 867 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019479769653775106, + "loss": 1.593, + "step": 868 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019478566945261837, + "loss": 1.9506, + "step": 869 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019477362885311682, + "loss": 1.9598, + "step": 870 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947615747409631, + "loss": 1.7324, + "step": 871 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019474950711787585, + "loss": 2.1208, + "step": 872 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947374259855757, + "loss": 1.4111, + "step": 873 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019472533134578507, + "loss": 1.6696, + "step": 874 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019471322320022849, + "loss": 1.6999, + "step": 875 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019470110155063225, + "loss": 2.1287, + "step": 876 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019468896639872468, + "loss": 1.874, + "step": 877 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019467681774623592, + "loss": 1.7149, + "step": 878 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019466465559489816, + "loss": 1.9563, + "step": 879 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019465247994644545, + "loss": 1.3504, + "step": 880 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019464029080261378, + "loss": 1.6176, + "step": 881 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019462808816514103, + "loss": 1.7577, + "step": 882 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019461587203576706, + "loss": 1.8054, + "step": 883 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019460364241623358, + "loss": 2.0246, + "step": 884 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019459139930828428, + "loss": 1.7645, + "step": 885 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945791427136648, + "loss": 1.9225, + "step": 886 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019456687263412262, + "loss": 1.8967, + "step": 887 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945545890714072, + "loss": 1.5287, + "step": 888 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945422920272699, + "loss": 1.5033, + "step": 889 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019452998150346401, + "loss": 2.0148, + "step": 890 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945176575017448, + "loss": 1.3706, + "step": 891 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001945053200238693, + "loss": 1.7603, + "step": 892 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019449296907159667, + "loss": 1.9884, + "step": 893 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019448060464668783, + "loss": 1.6133, + "step": 894 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019446822675090565, + "loss": 1.7885, + "step": 895 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019445583538601498, + "loss": 1.8573, + "step": 896 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944434305537826, + "loss": 1.7241, + "step": 897 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944310122559771, + "loss": 1.8942, + "step": 898 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944185804943691, + "loss": 1.7541, + "step": 899 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019440613527073105, + "loss": 1.9608, + "step": 900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019439367658683745, + "loss": 2.0969, + "step": 901 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019438120444446457, + "loss": 2.2589, + "step": 902 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943687188453907, + "loss": 1.7335, + "step": 903 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019435621979139596, + "loss": 1.8663, + "step": 904 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019434370728426252, + "loss": 1.5627, + "step": 905 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943311813257743, + "loss": 1.6101, + "step": 906 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019431864191771732, + "loss": 1.9661, + "step": 907 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943060890618794, + "loss": 1.6487, + "step": 908 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019429352276005026, + "loss": 2.1282, + "step": 909 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019428094301402162, + "loss": 1.6944, + "step": 910 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019426834982558705, + "loss": 1.2433, + "step": 911 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019425574319654213, + "loss": 1.5735, + "step": 912 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019424312312868417, + "loss": 1.6499, + "step": 913 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019423048962381265, + "loss": 1.8366, + "step": 914 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019421784268372876, + "loss": 1.906, + "step": 915 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019420518231023568, + "loss": 1.5976, + "step": 916 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941925085051385, + "loss": 1.6722, + "step": 917 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019417982127024422, + "loss": 1.8832, + "step": 918 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019416712060736183, + "loss": 1.8865, + "step": 919 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019415440651830208, + "loss": 1.6627, + "step": 920 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941416790048778, + "loss": 1.3598, + "step": 921 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019412893806890357, + "loss": 2.0506, + "step": 922 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019411618371219605, + "loss": 1.9794, + "step": 923 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941034159365737, + "loss": 1.7851, + "step": 924 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001940906347438569, + "loss": 1.8312, + "step": 925 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019407784013586804, + "loss": 1.5167, + "step": 926 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019406503211443128, + "loss": 1.5725, + "step": 927 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019405221068137277, + "loss": 1.8857, + "step": 928 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019403937583852061, + "loss": 1.741, + "step": 929 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019402652758770475, + "loss": 1.6748, + "step": 930 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019401366593075706, + "loss": 1.7285, + "step": 931 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019400079086951135, + "loss": 1.7545, + "step": 932 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019398790240580333, + "loss": 1.4491, + "step": 933 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019397500054147058, + "loss": 1.3359, + "step": 934 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019396208527835263, + "loss": 1.9567, + "step": 935 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001939491566182909, + "loss": 2.0011, + "step": 936 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019393621456312881, + "loss": 1.9076, + "step": 937 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019392325911471155, + "loss": 1.5388, + "step": 938 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019391029027488629, + "loss": 1.2337, + "step": 939 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019389730804550211, + "loss": 1.5752, + "step": 940 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019388431242840998, + "loss": 1.9131, + "step": 941 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019387130342546284, + "loss": 1.4177, + "step": 942 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019385828103851544, + "loss": 1.5865, + "step": 943 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001938452452694245, + "loss": 1.6335, + "step": 944 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019383219612004865, + "loss": 1.8599, + "step": 945 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019381913359224842, + "loss": 1.3035, + "step": 946 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019380605768788621, + "loss": 1.7586, + "step": 947 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001937929684088264, + "loss": 1.7334, + "step": 948 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019377986575693518, + "loss": 1.5749, + "step": 949 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019376674973408075, + "loss": 1.874, + "step": 950 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019375362034213314, + "loss": 2.3055, + "step": 951 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019374047758296433, + "loss": 1.5801, + "step": 952 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001937273214584482, + "loss": 1.8788, + "step": 953 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019371415197046052, + "loss": 2.431, + "step": 954 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019370096912087897, + "loss": 1.4963, + "step": 955 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001936877729115831, + "loss": 1.514, + "step": 956 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019367456334445446, + "loss": 1.6099, + "step": 957 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019366134042137642, + "loss": 1.9367, + "step": 958 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019364810414423427, + "loss": 1.7384, + "step": 959 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019363485451491524, + "loss": 1.6166, + "step": 960 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019362159153530844, + "loss": 1.955, + "step": 961 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019360831520730482, + "loss": 1.4189, + "step": 962 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019359502553279736, + "loss": 1.4506, + "step": 963 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019358172251368087, + "loss": 1.7108, + "step": 964 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019356840615185203, + "loss": 1.6641, + "step": 965 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019355507644920952, + "loss": 1.7506, + "step": 966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019354173340765382, + "loss": 2.0598, + "step": 967 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935283770290874, + "loss": 1.3494, + "step": 968 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019351500731541453, + "loss": 1.6571, + "step": 969 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935016242685415, + "loss": 1.6403, + "step": 970 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019348822789037637, + "loss": 1.7555, + "step": 971 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019347481818282925, + "loss": 2.1451, + "step": 972 + }, + { + "epoch": 1.15, + "learning_rate": 0.000193461395147812, + "loss": 1.4522, + "step": 973 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934479587872385, + "loss": 1.7147, + "step": 974 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934345091030245, + "loss": 1.3909, + "step": 975 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019342104609708756, + "loss": 1.8104, + "step": 976 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019340756977134728, + "loss": 1.5221, + "step": 977 + }, + { + "epoch": 1.16, + "learning_rate": 0.000193394080127725, + "loss": 1.9447, + "step": 978 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001933805771681442, + "loss": 1.5742, + "step": 979 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019336706089452996, + "loss": 1.5312, + "step": 980 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019335353130880948, + "loss": 1.4304, + "step": 981 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019333998841291177, + "loss": 1.8379, + "step": 982 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019332643220876773, + "loss": 1.877, + "step": 983 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001933128626983102, + "loss": 1.9627, + "step": 984 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932992798834739, + "loss": 1.7857, + "step": 985 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019328568376619543, + "loss": 1.3189, + "step": 986 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019327207434841333, + "loss": 1.9588, + "step": 987 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019325845163206795, + "loss": 1.3132, + "step": 988 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019324481561910163, + "loss": 1.6304, + "step": 989 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932311663114586, + "loss": 1.8322, + "step": 990 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019321750371108486, + "loss": 1.4192, + "step": 991 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001932038278199285, + "loss": 1.3915, + "step": 992 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019319013863993933, + "loss": 1.8433, + "step": 993 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931764361730692, + "loss": 2.1459, + "step": 994 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931627204212717, + "loss": 1.9799, + "step": 995 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019314899138650243, + "loss": 1.855, + "step": 996 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019313524907071887, + "loss": 1.4763, + "step": 997 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019312149347588037, + "loss": 2.0128, + "step": 998 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019310772460394814, + "loss": 1.6964, + "step": 999 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001930939424568854, + "loss": 1.5864, + "step": 1000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019308014703665712, + "loss": 1.8437, + "step": 1001 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019306633834523024, + "loss": 2.1677, + "step": 1002 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019305251638457356, + "loss": 1.8872, + "step": 1003 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930386811566578, + "loss": 1.7312, + "step": 1004 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930248326634556, + "loss": 1.6772, + "step": 1005 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019301097090694143, + "loss": 1.9666, + "step": 1006 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019299709588909165, + "loss": 1.8946, + "step": 1007 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019298320761188453, + "loss": 2.1784, + "step": 1008 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001929693060773003, + "loss": 2.0249, + "step": 1009 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019295539128732093, + "loss": 1.717, + "step": 1010 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019294146324393046, + "loss": 1.8671, + "step": 1011 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019292752194911464, + "loss": 1.8388, + "step": 1012 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019291356740486123, + "loss": 1.9111, + "step": 1013 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019289959961315986, + "loss": 1.5287, + "step": 1014 + }, + { + "epoch": 1.2, + "learning_rate": 0.000192885618576002, + "loss": 1.5669, + "step": 1015 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019287162429538105, + "loss": 1.9095, + "step": 1016 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019285761677329232, + "loss": 1.9133, + "step": 1017 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019284359601173294, + "loss": 2.1099, + "step": 1018 + }, + { + "epoch": 1.21, + "learning_rate": 0.000192829562012702, + "loss": 1.6303, + "step": 1019 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019281551477820036, + "loss": 1.5907, + "step": 1020 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019280145431023097, + "loss": 1.4897, + "step": 1021 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019278738061079845, + "loss": 1.7414, + "step": 1022 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019277329368190942, + "loss": 1.816, + "step": 1023 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019275919352557241, + "loss": 1.5033, + "step": 1024 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019274508014379777, + "loss": 1.7923, + "step": 1025 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019273095353859775, + "loss": 1.3094, + "step": 1026 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019271681371198652, + "loss": 1.7689, + "step": 1027 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001927026606659801, + "loss": 1.8019, + "step": 1028 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019268849440259639, + "loss": 1.8818, + "step": 1029 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019267431492385521, + "loss": 1.7442, + "step": 1030 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019266012223177824, + "loss": 2.045, + "step": 1031 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019264591632838903, + "loss": 1.7842, + "step": 1032 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019263169721571308, + "loss": 1.5289, + "step": 1033 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019261746489577765, + "loss": 1.6013, + "step": 1034 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019260321937061202, + "loss": 1.7912, + "step": 1035 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925889606422473, + "loss": 1.7573, + "step": 1036 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925746887127164, + "loss": 1.7368, + "step": 1037 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019256040358405424, + "loss": 1.7497, + "step": 1038 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019254610525829758, + "loss": 2.0042, + "step": 1039 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019253179373748504, + "loss": 2.0732, + "step": 1040 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019251746902365708, + "loss": 1.8878, + "step": 1041 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019250313111885618, + "loss": 1.9404, + "step": 1042 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019248878002512654, + "loss": 1.5535, + "step": 1043 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019247441574451432, + "loss": 1.9344, + "step": 1044 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001924600382790676, + "loss": 1.9696, + "step": 1045 + }, + { + "epoch": 1.24, + "eval_loss": 2.064669609069824, + "eval_runtime": 283.003, + "eval_samples_per_second": 0.728, + "eval_steps_per_second": 0.728, + "step": 1045 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019244564763083624, + "loss": 1.4577, + "step": 1046 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019243124380187204, + "loss": 2.1324, + "step": 1047 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019241682679422873, + "loss": 1.4713, + "step": 1048 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019240239660996177, + "loss": 1.7455, + "step": 1049 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001923879532511287, + "loss": 1.5372, + "step": 1050 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019237349671978872, + "loss": 2.0984, + "step": 1051 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923590270180031, + "loss": 1.5023, + "step": 1052 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923445441478348, + "loss": 2.0826, + "step": 1053 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019233004811134886, + "loss": 1.7448, + "step": 1054 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019231553891061208, + "loss": 2.0249, + "step": 1055 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019230101654769312, + "loss": 1.6144, + "step": 1056 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001922864810246626, + "loss": 1.9193, + "step": 1057 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019227193234359292, + "loss": 2.0057, + "step": 1058 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019225737050655842, + "loss": 1.9493, + "step": 1059 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019224279551563532, + "loss": 1.9545, + "step": 1060 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001922282073729017, + "loss": 1.8983, + "step": 1061 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019221360608043746, + "loss": 1.9414, + "step": 1062 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019219899164032447, + "loss": 1.8471, + "step": 1063 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921843640546464, + "loss": 1.7568, + "step": 1064 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019216972332548887, + "loss": 2.0737, + "step": 1065 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921550694549393, + "loss": 1.6109, + "step": 1066 + }, + { + "epoch": 1.27, + "learning_rate": 0.000192140402445087, + "loss": 1.6684, + "step": 1067 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001921257222980232, + "loss": 1.5101, + "step": 1068 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019211102901584094, + "loss": 1.5262, + "step": 1069 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920963226006352, + "loss": 1.9757, + "step": 1070 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019208160305450272, + "loss": 2.038, + "step": 1071 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019206687037954224, + "loss": 1.4755, + "step": 1072 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019205212457785434, + "loss": 1.7406, + "step": 1073 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019203736565154137, + "loss": 1.9564, + "step": 1074 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920225936027077, + "loss": 1.823, + "step": 1075 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001920078084334595, + "loss": 1.8275, + "step": 1076 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919930101459048, + "loss": 1.7106, + "step": 1077 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019197819874215347, + "loss": 1.5958, + "step": 1078 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019196337422431735, + "loss": 2.1478, + "step": 1079 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919485365945101, + "loss": 1.7238, + "step": 1080 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019193368585484718, + "loss": 2.0758, + "step": 1081 + }, + { + "epoch": 1.28, + "learning_rate": 0.000191918822007446, + "loss": 1.8403, + "step": 1082 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019190394505442585, + "loss": 1.8286, + "step": 1083 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019188905499790789, + "loss": 1.6992, + "step": 1084 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019187415184001503, + "loss": 1.8512, + "step": 1085 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918592355828722, + "loss": 1.8236, + "step": 1086 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918443062286061, + "loss": 1.6173, + "step": 1087 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019182936377934535, + "loss": 1.8593, + "step": 1088 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918144082372204, + "loss": 1.8184, + "step": 1089 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019179943960436358, + "loss": 1.9655, + "step": 1090 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019178445788290915, + "loss": 1.5858, + "step": 1091 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019176946307499312, + "loss": 1.8359, + "step": 1092 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917544551827534, + "loss": 1.4354, + "step": 1093 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019173943420832984, + "loss": 1.4312, + "step": 1094 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917244001538641, + "loss": 2.0024, + "step": 1095 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019170935302149965, + "loss": 1.5994, + "step": 1096 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019169429281338195, + "loss": 2.05, + "step": 1097 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019167921953165825, + "loss": 1.8746, + "step": 1098 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019166413317847763, + "loss": 2.0071, + "step": 1099 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019164903375599112, + "loss": 2.0331, + "step": 1100 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019163392126635154, + "loss": 1.3587, + "step": 1101 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019161879571171362, + "loss": 1.6144, + "step": 1102 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019160365709423388, + "loss": 1.4845, + "step": 1103 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019158850541607083, + "loss": 1.4511, + "step": 1104 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019157334067938474, + "loss": 1.8015, + "step": 1105 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019155816288633776, + "loss": 1.5029, + "step": 1106 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019154297203909394, + "loss": 1.7102, + "step": 1107 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019152776813981912, + "loss": 1.6661, + "step": 1108 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001915125511906811, + "loss": 1.5872, + "step": 1109 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019149732119384943, + "loss": 1.7868, + "step": 1110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914820781514956, + "loss": 1.6365, + "step": 1111 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914668220657929, + "loss": 2.3434, + "step": 1112 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914515529389166, + "loss": 1.6458, + "step": 1113 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914362707730437, + "loss": 1.7061, + "step": 1114 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019142097557035308, + "loss": 1.8606, + "step": 1115 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019140566733302552, + "loss": 1.9415, + "step": 1116 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019139034606324362, + "loss": 1.7411, + "step": 1117 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019137501176319193, + "loss": 1.9404, + "step": 1118 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913596644350567, + "loss": 1.802, + "step": 1119 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019134430408102615, + "loss": 1.2244, + "step": 1120 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019132893070329036, + "loss": 1.902, + "step": 1121 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913135443040412, + "loss": 1.4578, + "step": 1122 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019129814488547247, + "loss": 1.6816, + "step": 1123 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001912827324497798, + "loss": 1.7293, + "step": 1124 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019126730699916061, + "loss": 1.6344, + "step": 1125 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001912518685358143, + "loss": 1.6819, + "step": 1126 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019123641706194199, + "loss": 1.6761, + "step": 1127 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019122095257974677, + "loss": 1.9222, + "step": 1128 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019120547509143354, + "loss": 1.6117, + "step": 1129 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019118998459920902, + "loss": 1.688, + "step": 1130 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019117448110528184, + "loss": 1.8383, + "step": 1131 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019115896461186245, + "loss": 1.5225, + "step": 1132 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019114343512116318, + "loss": 2.0376, + "step": 1133 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019112789263539813, + "loss": 1.5632, + "step": 1134 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019111233715678343, + "loss": 1.7049, + "step": 1135 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910967686875369, + "loss": 1.4992, + "step": 1136 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019108118722987826, + "loss": 1.7949, + "step": 1137 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019106559278602903, + "loss": 1.4688, + "step": 1138 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019104998535821274, + "loss": 1.4031, + "step": 1139 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910343649486546, + "loss": 2.1757, + "step": 1140 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019101873155958179, + "loss": 1.622, + "step": 1141 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019100308519322322, + "loss": 1.9441, + "step": 1142 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001909874258518098, + "loss": 1.8065, + "step": 1143 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019097175353757417, + "loss": 1.8348, + "step": 1144 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019095606825275083, + "loss": 2.0519, + "step": 1145 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019094036999957624, + "loss": 1.9172, + "step": 1146 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019092465878028854, + "loss": 1.9961, + "step": 1147 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019090893459712787, + "loss": 2.1239, + "step": 1148 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019089319745233611, + "loss": 1.3481, + "step": 1149 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019087744734815708, + "loss": 1.5035, + "step": 1150 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019086168428683638, + "loss": 1.818, + "step": 1151 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019084590827062145, + "loss": 2.0481, + "step": 1152 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019083011930176165, + "loss": 1.4444, + "step": 1153 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019081431738250814, + "loss": 1.6059, + "step": 1154 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907985025151139, + "loss": 2.0284, + "step": 1155 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907826747018338, + "loss": 1.8603, + "step": 1156 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019076683394492455, + "loss": 1.7189, + "step": 1157 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019075098024664468, + "loss": 1.7497, + "step": 1158 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019073511360925458, + "loss": 1.7489, + "step": 1159 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001907192340350165, + "loss": 1.6059, + "step": 1160 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019070334152619453, + "loss": 1.4407, + "step": 1161 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019068743608505455, + "loss": 1.7025, + "step": 1162 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019067151771386438, + "loss": 1.7921, + "step": 1163 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906555864148936, + "loss": 1.6147, + "step": 1164 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906396421904137, + "loss": 1.6192, + "step": 1165 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019062368504269795, + "loss": 1.4341, + "step": 1166 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019060771497402147, + "loss": 1.3054, + "step": 1167 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001905917319866613, + "loss": 2.041, + "step": 1168 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019057573608289623, + "loss": 2.004, + "step": 1169 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019055972726500695, + "loss": 1.4002, + "step": 1170 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019054370553527595, + "loss": 1.5554, + "step": 1171 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019052767089598754, + "loss": 1.9783, + "step": 1172 + }, + { + "epoch": 1.39, + "learning_rate": 0.000190511623349428, + "loss": 1.7443, + "step": 1173 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019049556289788528, + "loss": 1.6089, + "step": 1174 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001904794895436493, + "loss": 1.8784, + "step": 1175 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904634032890117, + "loss": 2.0985, + "step": 1176 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904473041362661, + "loss": 1.811, + "step": 1177 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019043119208770793, + "loss": 1.407, + "step": 1178 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904150671456343, + "loss": 1.7269, + "step": 1179 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019039892931234435, + "loss": 1.8374, + "step": 1180 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019038277859013896, + "loss": 1.583, + "step": 1181 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019036661498132086, + "loss": 1.6407, + "step": 1182 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019035043848819464, + "loss": 2.0828, + "step": 1183 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019033424911306672, + "loss": 1.7067, + "step": 1184 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019031804685824534, + "loss": 1.55, + "step": 1185 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001903018317260406, + "loss": 1.7573, + "step": 1186 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019028560371876446, + "loss": 1.5666, + "step": 1187 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001902693628387306, + "loss": 1.5192, + "step": 1188 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019025310908825466, + "loss": 2.0093, + "step": 1189 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019023684246965406, + "loss": 1.8414, + "step": 1190 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019022056298524808, + "loss": 1.3696, + "step": 1191 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019020427063735782, + "loss": 1.6336, + "step": 1192 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019018796542830617, + "loss": 1.8528, + "step": 1193 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019017164736041795, + "loss": 2.0523, + "step": 1194 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019015531643601973, + "loss": 1.7526, + "step": 1195 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019013897265743998, + "loss": 1.8391, + "step": 1196 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019012261602700892, + "loss": 1.4257, + "step": 1197 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019010624654705867, + "loss": 2.0911, + "step": 1198 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001900898642199232, + "loss": 1.7578, + "step": 1199 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019007346904793818, + "loss": 1.9003, + "step": 1200 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900570610334413, + "loss": 1.3918, + "step": 1201 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900406401787719, + "loss": 2.0365, + "step": 1202 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019002420648627131, + "loss": 1.5184, + "step": 1203 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019000775995828254, + "loss": 1.6412, + "step": 1204 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018999130059715058, + "loss": 1.5031, + "step": 1205 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018997482840522217, + "loss": 1.4421, + "step": 1206 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018995834338484584, + "loss": 1.9431, + "step": 1207 + }, + { + "epoch": 1.43, + "learning_rate": 0.000189941845538372, + "loss": 1.8141, + "step": 1208 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001899253348681529, + "loss": 1.7289, + "step": 1209 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018990881137654258, + "loss": 1.7217, + "step": 1210 + }, + { + "epoch": 1.44, + "learning_rate": 0.000189892275065897, + "loss": 2.3727, + "step": 1211 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018987572593857381, + "loss": 1.4833, + "step": 1212 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018985916399693256, + "loss": 2.13, + "step": 1213 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018984258924333464, + "loss": 1.875, + "step": 1214 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018982600168014323, + "loss": 1.783, + "step": 1215 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018980940130972337, + "loss": 1.6815, + "step": 1216 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897927881344419, + "loss": 2.049, + "step": 1217 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018977616215666752, + "loss": 1.918, + "step": 1218 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897595233787707, + "loss": 1.5824, + "step": 1219 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018974287180312377, + "loss": 1.7473, + "step": 1220 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018972620743210093, + "loss": 1.6915, + "step": 1221 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897095302680781, + "loss": 1.7633, + "step": 1222 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018969284031343308, + "loss": 1.6921, + "step": 1223 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018967613757054554, + "loss": 1.5433, + "step": 1224 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018965942204179686, + "loss": 1.9389, + "step": 1225 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018964269372957038, + "loss": 1.5625, + "step": 1226 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018962595263625115, + "loss": 1.4835, + "step": 1227 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018960919876422611, + "loss": 1.8479, + "step": 1228 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018959243211588397, + "loss": 1.7861, + "step": 1229 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018957565269361531, + "loss": 1.867, + "step": 1230 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018955886049981245, + "loss": 1.9383, + "step": 1231 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001895420555368697, + "loss": 1.755, + "step": 1232 + }, + { + "epoch": 1.46, + "learning_rate": 0.000189525237807183, + "loss": 1.5166, + "step": 1233 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018950840731315024, + "loss": 1.8629, + "step": 1234 + }, + { + "epoch": 1.47, + "learning_rate": 0.000189491564057171, + "loss": 1.6845, + "step": 1235 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018947470804164685, + "loss": 1.4748, + "step": 1236 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018945783926898105, + "loss": 1.8907, + "step": 1237 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018944095774157873, + "loss": 1.5758, + "step": 1238 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018942406346184683, + "loss": 1.6367, + "step": 1239 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018940715643219407, + "loss": 1.7285, + "step": 1240 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018939023665503108, + "loss": 1.5714, + "step": 1241 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001893733041327702, + "loss": 1.9308, + "step": 1242 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018935635886782568, + "loss": 1.9153, + "step": 1243 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018933940086261351, + "loss": 1.8009, + "step": 1244 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018932243011955154, + "loss": 1.7392, + "step": 1245 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018930544664105944, + "loss": 1.821, + "step": 1246 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001892884504295587, + "loss": 1.475, + "step": 1247 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018927144148747255, + "loss": 1.8937, + "step": 1248 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018925441981722618, + "loss": 1.6958, + "step": 1249 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018923738542124644, + "loss": 1.6836, + "step": 1250 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018922033830196208, + "loss": 2.0213, + "step": 1251 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018920327846180365, + "loss": 1.9572, + "step": 1252 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018918620590320352, + "loss": 1.9449, + "step": 1253 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018916912062859583, + "loss": 1.7297, + "step": 1254 + }, + { + "epoch": 1.49, + "eval_loss": 2.0551259517669678, + "eval_runtime": 283.8338, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1254 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018915202264041664, + "loss": 1.8158, + "step": 1255 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891349119411037, + "loss": 1.921, + "step": 1256 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018911778853309658, + "loss": 1.5726, + "step": 1257 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891006524188368, + "loss": 1.6641, + "step": 1258 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018908350360076752, + "loss": 1.5841, + "step": 1259 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018906634208133385, + "loss": 1.8567, + "step": 1260 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018904916786298257, + "loss": 1.5584, + "step": 1261 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018903198094816242, + "loss": 1.6615, + "step": 1262 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018901478133932385, + "loss": 1.7477, + "step": 1263 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018899756903891914, + "loss": 1.3796, + "step": 1264 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018898034404940238, + "loss": 1.7991, + "step": 1265 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018896310637322953, + "loss": 1.4944, + "step": 1266 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018894585601285827, + "loss": 1.5719, + "step": 1267 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018892859297074812, + "loss": 1.5495, + "step": 1268 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018891131724936043, + "loss": 1.7611, + "step": 1269 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018889402885115833, + "loss": 1.5991, + "step": 1270 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018887672777860676, + "loss": 1.8849, + "step": 1271 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888594140341725, + "loss": 1.6136, + "step": 1272 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888420876203241, + "loss": 1.8288, + "step": 1273 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888247485395319, + "loss": 1.6625, + "step": 1274 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018880739679426816, + "loss": 1.49, + "step": 1275 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018879003238700675, + "loss": 1.874, + "step": 1276 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018877265532022352, + "loss": 1.751, + "step": 1277 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018875526559639604, + "loss": 1.9882, + "step": 1278 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018873786321800374, + "loss": 1.5214, + "step": 1279 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001887204481875278, + "loss": 1.741, + "step": 1280 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018870302050745118, + "loss": 1.7798, + "step": 1281 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018868558018025878, + "loss": 1.9258, + "step": 1282 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001886681272084371, + "loss": 1.9096, + "step": 1283 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018865066159447466, + "loss": 1.6729, + "step": 1284 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018863318334086157, + "loss": 1.6239, + "step": 1285 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018861569245008994, + "loss": 1.9857, + "step": 1286 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018859818892465354, + "loss": 1.9905, + "step": 1287 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188580672767048, + "loss": 2.0073, + "step": 1288 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018856314397977075, + "loss": 1.7109, + "step": 1289 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188545602565321, + "loss": 1.3727, + "step": 1290 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018852804852619975, + "loss": 1.7045, + "step": 1291 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018851048186490992, + "loss": 1.9042, + "step": 1292 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018849290258395602, + "loss": 1.7174, + "step": 1293 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018847531068584452, + "loss": 1.6502, + "step": 1294 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018845770617308366, + "loss": 1.8582, + "step": 1295 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001884400890481834, + "loss": 1.4846, + "step": 1296 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018842245931365562, + "loss": 1.5428, + "step": 1297 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018840481697201392, + "loss": 1.7266, + "step": 1298 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001883871620257737, + "loss": 1.9324, + "step": 1299 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018836949447745215, + "loss": 1.577, + "step": 1300 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001883518143295683, + "loss": 1.6388, + "step": 1301 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018833412158464298, + "loss": 1.9201, + "step": 1302 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018831641624519877, + "loss": 1.6478, + "step": 1303 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018829869831376005, + "loss": 1.6826, + "step": 1304 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018828096779285303, + "loss": 1.8513, + "step": 1305 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018826322468500566, + "loss": 1.571, + "step": 1306 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018824546899274777, + "loss": 1.1602, + "step": 1307 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001882277007186109, + "loss": 1.9998, + "step": 1308 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001882099198651284, + "loss": 1.7034, + "step": 1309 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001881921264348355, + "loss": 1.4031, + "step": 1310 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018817432043026911, + "loss": 1.8413, + "step": 1311 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018815650185396797, + "loss": 1.6606, + "step": 1312 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018813867070847264, + "loss": 1.5792, + "step": 1313 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018812082699632546, + "loss": 1.4525, + "step": 1314 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018810297072007054, + "loss": 1.4906, + "step": 1315 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018808510188225377, + "loss": 1.6284, + "step": 1316 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880672204854229, + "loss": 1.7281, + "step": 1317 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880493265321274, + "loss": 1.5345, + "step": 1318 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018803142002491856, + "loss": 2.0933, + "step": 1319 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018801350096634946, + "loss": 1.9372, + "step": 1320 + }, + { + "epoch": 1.57, + "learning_rate": 0.000187995569358975, + "loss": 1.7151, + "step": 1321 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018797762520535177, + "loss": 1.4823, + "step": 1322 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001879596685080383, + "loss": 2.0495, + "step": 1323 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018794169926959474, + "loss": 2.2966, + "step": 1324 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018792371749258314, + "loss": 1.7868, + "step": 1325 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018790572317956735, + "loss": 1.9403, + "step": 1326 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018788771633311292, + "loss": 1.6687, + "step": 1327 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018786969695578723, + "loss": 1.8422, + "step": 1328 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018785166505015948, + "loss": 1.5916, + "step": 1329 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018783362061880062, + "loss": 1.9119, + "step": 1330 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018781556366428336, + "loss": 1.4903, + "step": 1331 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018779749418918227, + "loss": 1.9497, + "step": 1332 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018777941219607364, + "loss": 1.9462, + "step": 1333 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018776131768753556, + "loss": 2.0474, + "step": 1334 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018774321066614795, + "loss": 1.4474, + "step": 1335 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018772509113449245, + "loss": 1.8315, + "step": 1336 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018770695909515247, + "loss": 1.7684, + "step": 1337 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018768881455071332, + "loss": 1.2675, + "step": 1338 + }, + { + "epoch": 1.59, + "learning_rate": 0.000187670657503762, + "loss": 1.8226, + "step": 1339 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018765248795688726, + "loss": 2.2112, + "step": 1340 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001876343059126797, + "loss": 1.3627, + "step": 1341 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018761611137373173, + "loss": 2.1488, + "step": 1342 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018759790434263744, + "loss": 1.9842, + "step": 1343 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018757968482199276, + "loss": 1.9775, + "step": 1344 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018756145281439545, + "loss": 1.6835, + "step": 1345 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875432083224449, + "loss": 1.5272, + "step": 1346 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875249513487425, + "loss": 1.7539, + "step": 1347 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018750668189589117, + "loss": 1.874, + "step": 1348 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018748839996649583, + "loss": 1.5858, + "step": 1349 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018747010556316305, + "loss": 1.9298, + "step": 1350 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001874517986885012, + "loss": 1.5079, + "step": 1351 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018743347934512046, + "loss": 1.884, + "step": 1352 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018741514753563277, + "loss": 1.7978, + "step": 1353 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873968032626518, + "loss": 1.7735, + "step": 1354 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018737844652879312, + "loss": 1.7227, + "step": 1355 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018736007733667393, + "loss": 1.8458, + "step": 1356 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018734169568891334, + "loss": 1.3268, + "step": 1357 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873233015881321, + "loss": 1.3782, + "step": 1358 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018730489503695287, + "loss": 1.9614, + "step": 1359 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018728647603800003, + "loss": 1.7755, + "step": 1360 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018726804459389963, + "loss": 1.7961, + "step": 1361 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018724960070727972, + "loss": 1.7158, + "step": 1362 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872311443807699, + "loss": 1.6303, + "step": 1363 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872126756170017, + "loss": 1.8734, + "step": 1364 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018719419441860834, + "loss": 1.5143, + "step": 1365 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001871757007882248, + "loss": 1.498, + "step": 1366 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001871571947284879, + "loss": 1.0886, + "step": 1367 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018713867624203621, + "loss": 1.6633, + "step": 1368 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018712014533151008, + "loss": 1.8895, + "step": 1369 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018710160199955156, + "loss": 1.4178, + "step": 1370 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018708304624880456, + "loss": 1.6814, + "step": 1371 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001870644780819147, + "loss": 1.8671, + "step": 1372 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018704589750152944, + "loss": 1.4786, + "step": 1373 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018702730451029796, + "loss": 1.8622, + "step": 1374 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018700869911087115, + "loss": 1.8891, + "step": 1375 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869900813059018, + "loss": 2.0493, + "step": 1376 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018697145109804436, + "loss": 1.7238, + "step": 1377 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018695280848995513, + "loss": 1.7826, + "step": 1378 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869341534842921, + "loss": 1.8557, + "step": 1379 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869154860837151, + "loss": 1.7492, + "step": 1380 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868968062908857, + "loss": 1.7441, + "step": 1381 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868781141084672, + "loss": 1.8322, + "step": 1382 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868594095391247, + "loss": 1.8177, + "step": 1383 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018684069258552508, + "loss": 2.0001, + "step": 1384 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018682196325033696, + "loss": 1.5046, + "step": 1385 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018680322153623075, + "loss": 1.6789, + "step": 1386 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867844674458786, + "loss": 1.6951, + "step": 1387 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018676570098195443, + "loss": 2.0334, + "step": 1388 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018674692214713388, + "loss": 1.7833, + "step": 1389 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867281309440945, + "loss": 1.82, + "step": 1390 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018670932737551547, + "loss": 1.8155, + "step": 1391 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018669051144407775, + "loss": 1.7912, + "step": 1392 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018667168315246406, + "loss": 1.5816, + "step": 1393 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018665284250335895, + "loss": 1.7521, + "step": 1394 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018663398949944865, + "loss": 1.4287, + "step": 1395 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018661512414342127, + "loss": 1.6026, + "step": 1396 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018659624643796647, + "loss": 1.6953, + "step": 1397 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018657735638577587, + "loss": 1.8515, + "step": 1398 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018655845398954276, + "loss": 2.0384, + "step": 1399 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018653953925196225, + "loss": 1.5458, + "step": 1400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018652061217573114, + "loss": 1.7166, + "step": 1401 + }, + { + "epoch": 1.67, + "learning_rate": 0.000186501672763548, + "loss": 1.5653, + "step": 1402 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018648272101811318, + "loss": 2.0928, + "step": 1403 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018646375694212884, + "loss": 1.605, + "step": 1404 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018644478053829878, + "loss": 1.4734, + "step": 1405 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018642579180932865, + "loss": 2.0578, + "step": 1406 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018640679075792582, + "loss": 1.9823, + "step": 1407 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018638777738679943, + "loss": 2.0551, + "step": 1408 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018636875169866036, + "loss": 1.6315, + "step": 1409 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863497136962213, + "loss": 1.8965, + "step": 1410 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863306633821966, + "loss": 1.3584, + "step": 1411 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018631160075930245, + "loss": 1.9673, + "step": 1412 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018629252583025676, + "loss": 1.5277, + "step": 1413 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001862734385977792, + "loss": 1.6788, + "step": 1414 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018625433906459116, + "loss": 1.432, + "step": 1415 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018623522723341588, + "loss": 1.8102, + "step": 1416 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018621610310697823, + "loss": 1.6713, + "step": 1417 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018619696668800492, + "loss": 1.6989, + "step": 1418 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001861778179792244, + "loss": 1.7645, + "step": 1419 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018615865698336684, + "loss": 1.594, + "step": 1420 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018613948370316415, + "loss": 1.8751, + "step": 1421 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018612029814135014, + "loss": 1.64, + "step": 1422 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018610110030066007, + "loss": 1.5066, + "step": 1423 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001860818901838313, + "loss": 1.9817, + "step": 1424 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018606266779360266, + "loss": 2.056, + "step": 1425 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860434331327149, + "loss": 1.6997, + "step": 1426 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018602418620391044, + "loss": 1.5573, + "step": 1427 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860049270099335, + "loss": 1.8427, + "step": 1428 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018598565555353, + "loss": 2.012, + "step": 1429 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018596637183744763, + "loss": 1.7976, + "step": 1430 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018594707586443585, + "loss": 1.4, + "step": 1431 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001859277676372458, + "loss": 1.8717, + "step": 1432 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018590844715863045, + "loss": 1.4311, + "step": 1433 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018588911443134448, + "loss": 1.5903, + "step": 1434 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018586976945814425, + "loss": 2.0898, + "step": 1435 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018585041224178803, + "loss": 1.5302, + "step": 1436 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018583104278503568, + "loss": 1.9582, + "step": 1437 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018581166109064886, + "loss": 1.5264, + "step": 1438 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018579226716139096, + "loss": 1.6551, + "step": 1439 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018577286100002723, + "loss": 1.7774, + "step": 1440 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018575344260932444, + "loss": 1.8316, + "step": 1441 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001857340119920513, + "loss": 1.3916, + "step": 1442 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018571456915097818, + "loss": 1.6728, + "step": 1443 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856951140888772, + "loss": 1.7247, + "step": 1444 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018567564680852224, + "loss": 1.4539, + "step": 1445 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018565616731268888, + "loss": 1.613, + "step": 1446 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856366756041545, + "loss": 1.757, + "step": 1447 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018561717168569816, + "loss": 1.6903, + "step": 1448 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018559765556010072, + "loss": 1.7322, + "step": 1449 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018557812723014476, + "loss": 1.5627, + "step": 1450 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018555858669861458, + "loss": 1.8751, + "step": 1451 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018553903396829625, + "loss": 1.2721, + "step": 1452 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018551946904197752, + "loss": 1.8167, + "step": 1453 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018549989192244797, + "loss": 1.6602, + "step": 1454 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018548030261249885, + "loss": 1.9053, + "step": 1455 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018546070111492315, + "loss": 1.7721, + "step": 1456 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018544108743251566, + "loss": 2.1421, + "step": 1457 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018542146156807284, + "loss": 1.5076, + "step": 1458 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018540182352439288, + "loss": 1.9039, + "step": 1459 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018538217330427582, + "loss": 1.9777, + "step": 1460 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018536251091052323, + "loss": 1.5702, + "step": 1461 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018534283634593862, + "loss": 1.851, + "step": 1462 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018532314961332717, + "loss": 1.5337, + "step": 1463 + }, + { + "epoch": 1.74, + "eval_loss": 2.068387508392334, + "eval_runtime": 283.4638, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 1463 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018530345071549574, + "loss": 1.7553, + "step": 1464 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018528373965525296, + "loss": 1.4175, + "step": 1465 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018526401643540922, + "loss": 1.7216, + "step": 1466 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018524428105877664, + "loss": 1.6415, + "step": 1467 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018522453352816896, + "loss": 1.7284, + "step": 1468 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018520477384640187, + "loss": 1.8314, + "step": 1469 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018518500201629258, + "loss": 1.8341, + "step": 1470 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018516521804066015, + "loss": 1.4129, + "step": 1471 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018514542192232537, + "loss": 1.4671, + "step": 1472 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018512561366411067, + "loss": 1.6665, + "step": 1473 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018510579326884034, + "loss": 1.5722, + "step": 1474 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850859607393403, + "loss": 1.9348, + "step": 1475 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850661160784383, + "loss": 1.5404, + "step": 1476 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018504625928896363, + "loss": 1.4769, + "step": 1477 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018502639037374757, + "loss": 1.4149, + "step": 1478 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850065093356229, + "loss": 1.958, + "step": 1479 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018498661617742426, + "loss": 1.8319, + "step": 1480 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018496671090198797, + "loss": 1.5948, + "step": 1481 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001849467935121521, + "loss": 1.8469, + "step": 1482 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018492686401075644, + "loss": 1.6798, + "step": 1483 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001849069224006425, + "loss": 1.8197, + "step": 1484 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848869686846535, + "loss": 1.6613, + "step": 1485 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848670028656344, + "loss": 1.7322, + "step": 1486 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018484702494643188, + "loss": 2.0493, + "step": 1487 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018482703492989444, + "loss": 1.7182, + "step": 1488 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018480703281887215, + "loss": 1.689, + "step": 1489 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018478701861621686, + "loss": 1.9477, + "step": 1490 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001847669923247822, + "loss": 1.8171, + "step": 1491 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018474695394742345, + "loss": 1.7337, + "step": 1492 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847269034869977, + "loss": 1.6983, + "step": 1493 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847068409463636, + "loss": 1.6445, + "step": 1494 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846867663283818, + "loss": 1.9965, + "step": 1495 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846666796359143, + "loss": 1.6775, + "step": 1496 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846465808718252, + "loss": 1.8117, + "step": 1497 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018462647003898006, + "loss": 1.8803, + "step": 1498 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018460634714024624, + "loss": 1.3045, + "step": 1499 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018458621217849286, + "loss": 1.7768, + "step": 1500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018456606515659073, + "loss": 2.0641, + "step": 1501 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001845459060774123, + "loss": 1.3804, + "step": 1502 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018452573494383192, + "loss": 1.6271, + "step": 1503 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018450555175872547, + "loss": 1.8525, + "step": 1504 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018448535652497073, + "loss": 1.5303, + "step": 1505 + }, + { + "epoch": 1.79, + "learning_rate": 0.000184465149245447, + "loss": 2.0368, + "step": 1506 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018444492992303544, + "loss": 1.9951, + "step": 1507 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001844246985606189, + "loss": 1.8715, + "step": 1508 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018440445516108186, + "loss": 1.7373, + "step": 1509 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018438419972731067, + "loss": 1.7667, + "step": 1510 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018436393226219327, + "loss": 1.5134, + "step": 1511 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018434365276861938, + "loss": 1.3891, + "step": 1512 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001843233612494804, + "loss": 1.7066, + "step": 1513 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018430305770766948, + "loss": 1.6366, + "step": 1514 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842827421460814, + "loss": 1.7838, + "step": 1515 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842624145676128, + "loss": 1.7884, + "step": 1516 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001842420749751619, + "loss": 1.8428, + "step": 1517 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018422172337162867, + "loss": 1.4987, + "step": 1518 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018420135975991483, + "loss": 1.7576, + "step": 1519 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001841809841429238, + "loss": 1.8522, + "step": 1520 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018416059652356066, + "loss": 1.9308, + "step": 1521 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018414019690473227, + "loss": 1.4658, + "step": 1522 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018411978528934717, + "loss": 1.7072, + "step": 1523 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001840993616803156, + "loss": 1.736, + "step": 1524 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840789260805495, + "loss": 1.7712, + "step": 1525 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840584784929626, + "loss": 1.2231, + "step": 1526 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018403801892047023, + "loss": 1.8421, + "step": 1527 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018401754736598947, + "loss": 1.2689, + "step": 1528 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018399706383243918, + "loss": 1.8062, + "step": 1529 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839765683227398, + "loss": 1.6846, + "step": 1530 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839560608398136, + "loss": 1.8201, + "step": 1531 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018393554138658441, + "loss": 1.6958, + "step": 1532 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018391500996597796, + "loss": 1.8487, + "step": 1533 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001838944665809215, + "loss": 1.9788, + "step": 1534 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018387391123434412, + "loss": 1.6002, + "step": 1535 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018385334392917658, + "loss": 1.3859, + "step": 1536 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018383276466835127, + "loss": 2.0743, + "step": 1537 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018381217345480235, + "loss": 1.8357, + "step": 1538 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018379157029146573, + "loss": 1.7002, + "step": 1539 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018377095518127897, + "loss": 1.3058, + "step": 1540 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018375032812718124, + "loss": 1.8745, + "step": 1541 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018372968913211364, + "loss": 1.7847, + "step": 1542 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018370903819901874, + "loss": 1.8156, + "step": 1543 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018368837533084095, + "loss": 2.0152, + "step": 1544 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018366770053052634, + "loss": 1.5656, + "step": 1545 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018364701380102266, + "loss": 1.5753, + "step": 1546 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018362631514527947, + "loss": 1.3938, + "step": 1547 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018360560456624788, + "loss": 1.9599, + "step": 1548 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018358488206688075, + "loss": 1.8641, + "step": 1549 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018356414765013267, + "loss": 1.8428, + "step": 1550 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018354340131895998, + "loss": 1.6016, + "step": 1551 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018352264307632056, + "loss": 1.5768, + "step": 1552 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018350187292517415, + "loss": 1.5369, + "step": 1553 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001834810908684821, + "loss": 1.9717, + "step": 1554 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018346029690920746, + "loss": 1.943, + "step": 1555 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018343949105031505, + "loss": 1.8166, + "step": 1556 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018341867329477125, + "loss": 1.7149, + "step": 1557 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018339784364554426, + "loss": 1.4657, + "step": 1558 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018337700210560397, + "loss": 1.8693, + "step": 1559 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018335614867792183, + "loss": 1.7656, + "step": 1560 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001833352833654712, + "loss": 1.5123, + "step": 1561 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018331440617122696, + "loss": 1.7884, + "step": 1562 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832935170981657, + "loss": 1.7309, + "step": 1563 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018327261614926583, + "loss": 1.9628, + "step": 1564 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018325170332750732, + "loss": 1.6409, + "step": 1565 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832307786358719, + "loss": 1.6093, + "step": 1566 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018320984207734298, + "loss": 1.6111, + "step": 1567 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018318889365490565, + "loss": 2.0085, + "step": 1568 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018316793337154664, + "loss": 2.079, + "step": 1569 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018314696123025454, + "loss": 1.5466, + "step": 1570 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018312597723401942, + "loss": 2.0825, + "step": 1571 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001831049813858332, + "loss": 1.9748, + "step": 1572 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018308397368868945, + "loss": 1.6529, + "step": 1573 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018306295414558335, + "loss": 1.7119, + "step": 1574 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018304192275951184, + "loss": 1.8812, + "step": 1575 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018302087953347352, + "loss": 1.8676, + "step": 1576 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018299982447046877, + "loss": 1.879, + "step": 1577 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018297875757349952, + "loss": 1.6282, + "step": 1578 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018295767884556947, + "loss": 1.735, + "step": 1579 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018293658828968397, + "loss": 1.5796, + "step": 1580 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018291548590885007, + "loss": 1.8258, + "step": 1581 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018289437170607658, + "loss": 1.7531, + "step": 1582 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018287324568437381, + "loss": 1.6265, + "step": 1583 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018285210784675394, + "loss": 1.7997, + "step": 1584 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018283095819623078, + "loss": 1.955, + "step": 1585 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018280979673581977, + "loss": 1.6542, + "step": 1586 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018278862346853808, + "loss": 1.7634, + "step": 1587 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018276743839740458, + "loss": 2.0077, + "step": 1588 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018274624152543977, + "loss": 2.0254, + "step": 1589 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018272503285566587, + "loss": 1.4464, + "step": 1590 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018270381239110677, + "loss": 1.8643, + "step": 1591 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018268258013478804, + "loss": 1.3278, + "step": 1592 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018266133608973696, + "loss": 1.744, + "step": 1593 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018264008025898248, + "loss": 1.5079, + "step": 1594 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018261881264555516, + "loss": 1.9655, + "step": 1595 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001825975332524873, + "loss": 2.0557, + "step": 1596 + }, + { + "epoch": 1.9, + "learning_rate": 0.000182576242082813, + "loss": 1.7174, + "step": 1597 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018255493913956774, + "loss": 1.449, + "step": 1598 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018253362442578896, + "loss": 1.9058, + "step": 1599 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018251229794451567, + "loss": 1.3482, + "step": 1600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018249095969878853, + "loss": 1.7906, + "step": 1601 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018246960969164994, + "loss": 1.6177, + "step": 1602 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018244824792614393, + "loss": 1.5786, + "step": 1603 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018242687440531618, + "loss": 1.6451, + "step": 1604 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018240548913221416, + "loss": 1.3695, + "step": 1605 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001823840921098869, + "loss": 1.6648, + "step": 1606 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018236268334138515, + "loss": 2.1548, + "step": 1607 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018234126282976133, + "loss": 1.6153, + "step": 1608 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001823198305780696, + "loss": 1.741, + "step": 1609 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018229838658936564, + "loss": 1.7827, + "step": 1610 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018227693086670697, + "loss": 1.7343, + "step": 1611 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018225546341315261, + "loss": 1.8149, + "step": 1612 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001822339842317635, + "loss": 1.5497, + "step": 1613 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018221249332560198, + "loss": 1.7659, + "step": 1614 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001821909906977322, + "loss": 1.8992, + "step": 1615 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018216947635122, + "loss": 1.8682, + "step": 1616 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018214795028913288, + "loss": 1.9774, + "step": 1617 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001821264125145399, + "loss": 1.9441, + "step": 1618 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018210486303051195, + "loss": 2.0314, + "step": 1619 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001820833018401215, + "loss": 1.8234, + "step": 1620 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018206172894644272, + "loss": 1.9478, + "step": 1621 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018204014435255135, + "loss": 1.7894, + "step": 1622 + }, + { + "epoch": 1.93, + "learning_rate": 0.000182018548061525, + "loss": 1.5469, + "step": 1623 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018199694007644277, + "loss": 1.9419, + "step": 1624 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018197532040038547, + "loss": 1.6686, + "step": 1625 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018195368903643563, + "loss": 2.2525, + "step": 1626 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018193204598767744, + "loss": 1.8076, + "step": 1627 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018191039125719662, + "loss": 1.976, + "step": 1628 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018188872484808076, + "loss": 1.6896, + "step": 1629 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018186704676341898, + "loss": 1.6784, + "step": 1630 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018184535700630213, + "loss": 1.9634, + "step": 1631 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018182365557982264, + "loss": 1.7406, + "step": 1632 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018180194248707473, + "loss": 1.7492, + "step": 1633 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018178021773115414, + "loss": 1.7731, + "step": 1634 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018175848131515837, + "loss": 1.6232, + "step": 1635 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817367332421866, + "loss": 1.7488, + "step": 1636 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817149735153396, + "loss": 1.3398, + "step": 1637 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018169320213771983, + "loss": 1.4521, + "step": 1638 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018167141911243145, + "loss": 1.6311, + "step": 1639 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018164962444258014, + "loss": 1.8911, + "step": 1640 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018162781813127346, + "loss": 1.9879, + "step": 1641 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001816060001816205, + "loss": 1.5637, + "step": 1642 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018158417059673196, + "loss": 1.7461, + "step": 1643 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815623293797203, + "loss": 1.6671, + "step": 1644 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815404765336996, + "loss": 1.2124, + "step": 1645 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815186120617856, + "loss": 1.6402, + "step": 1646 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001814967359670957, + "loss": 1.8837, + "step": 1647 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018147484825274893, + "loss": 1.8027, + "step": 1648 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018145294892186605, + "loss": 1.7684, + "step": 1649 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001814310379775694, + "loss": 1.8274, + "step": 1650 + }, + { + "epoch": 1.97, + "learning_rate": 0.000181409115422983, + "loss": 1.8292, + "step": 1651 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018138718126123248, + "loss": 1.3492, + "step": 1652 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018136523549544523, + "loss": 1.509, + "step": 1653 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018134327812875024, + "loss": 1.7415, + "step": 1654 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018132130916427816, + "loss": 1.5223, + "step": 1655 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018129932860516126, + "loss": 1.9294, + "step": 1656 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018127733645453348, + "loss": 2.0716, + "step": 1657 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018125533271553043, + "loss": 1.57, + "step": 1658 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018123331739128938, + "loss": 2.2132, + "step": 1659 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018121129048494922, + "loss": 1.9006, + "step": 1660 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018118925199965048, + "loss": 1.9319, + "step": 1661 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018116720193853543, + "loss": 1.8103, + "step": 1662 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018114514030474787, + "loss": 1.7028, + "step": 1663 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018112306710143334, + "loss": 1.802, + "step": 1664 + }, + { + "epoch": 1.98, + "learning_rate": 0.000181100982331739, + "loss": 1.6835, + "step": 1665 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001810788859988136, + "loss": 1.7223, + "step": 1666 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810567781058077, + "loss": 1.5829, + "step": 1667 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018103465865587333, + "loss": 1.9863, + "step": 1668 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810125276521642, + "loss": 1.6398, + "step": 1669 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018099038509783582, + "loss": 1.9261, + "step": 1670 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018096823099604517, + "loss": 1.8882, + "step": 1671 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018094606534995093, + "loss": 1.6716, + "step": 1672 + }, + { + "epoch": 1.99, + "eval_loss": 2.075261354446411, + "eval_runtime": 283.9438, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 1672 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018092388816271345, + "loss": 1.6688, + "step": 1673 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018090169943749476, + "loss": 1.9127, + "step": 1674 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001808794991774584, + "loss": 1.7214, + "step": 1675 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018085728738576973, + "loss": 1.785, + "step": 1676 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018083506406559561, + "loss": 1.5287, + "step": 1677 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018081282922010464, + "loss": 1.9012, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018079058285246698, + "loss": 1.3094, + "step": 1679 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807683249658545, + "loss": 1.818, + "step": 1680 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807460555634407, + "loss": 1.9389, + "step": 1681 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807237746484007, + "loss": 1.4334, + "step": 1682 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018070148222391126, + "loss": 1.5422, + "step": 1683 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806791782931508, + "loss": 1.7899, + "step": 1684 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806568628592994, + "loss": 1.6106, + "step": 1685 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018063453592553872, + "loss": 1.9807, + "step": 1686 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806121974950521, + "loss": 1.1762, + "step": 1687 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018058984757102456, + "loss": 1.8338, + "step": 1688 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001805674861566426, + "loss": 1.5556, + "step": 1689 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805451132550946, + "loss": 0.87, + "step": 1690 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018052272886957038, + "loss": 1.0386, + "step": 1691 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805003330032615, + "loss": 0.8153, + "step": 1692 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018047792565936102, + "loss": 1.1745, + "step": 1693 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018045550684106388, + "loss": 1.1584, + "step": 1694 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018043307655156644, + "loss": 1.0742, + "step": 1695 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018041063479406675, + "loss": 1.0537, + "step": 1696 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803881815717646, + "loss": 1.0239, + "step": 1697 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803657168878612, + "loss": 0.9182, + "step": 1698 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018034324074555965, + "loss": 1.1856, + "step": 1699 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018032075314806448, + "loss": 1.3285, + "step": 1700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018029825409858198, + "loss": 1.2912, + "step": 1701 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018027574360032, + "loss": 1.3666, + "step": 1702 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018025322165648807, + "loss": 0.9621, + "step": 1703 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018023068827029723, + "loss": 0.8484, + "step": 1704 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018020814344496037, + "loss": 1.2236, + "step": 1705 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018018558718369186, + "loss": 0.8155, + "step": 1706 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001801630194897077, + "loss": 1.2047, + "step": 1707 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018014044036622555, + "loss": 1.0269, + "step": 1708 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018011784981646474, + "loss": 1.0536, + "step": 1709 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018009524784364615, + "loss": 1.0516, + "step": 1710 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018007263445099235, + "loss": 0.9087, + "step": 1711 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001800500096417275, + "loss": 1.3057, + "step": 1712 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018002737341907743, + "loss": 0.8791, + "step": 1713 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018000472578626956, + "loss": 1.1667, + "step": 1714 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017998206674653294, + "loss": 1.1026, + "step": 1715 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017995939630309826, + "loss": 1.3228, + "step": 1716 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001799367144591978, + "loss": 0.9173, + "step": 1717 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017991402121806557, + "loss": 1.0067, + "step": 1718 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798913165829371, + "loss": 1.0256, + "step": 1719 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017986860055704953, + "loss": 0.7645, + "step": 1720 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798458731436417, + "loss": 1.0567, + "step": 1721 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017982313434595406, + "loss": 0.7465, + "step": 1722 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017980038416722863, + "loss": 1.3268, + "step": 1723 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017977762261070916, + "loss": 0.9917, + "step": 1724 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017975484967964087, + "loss": 0.8592, + "step": 1725 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017973206537727073, + "loss": 1.43, + "step": 1726 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017970926970684725, + "loss": 1.3679, + "step": 1727 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017968646267162063, + "loss": 1.2959, + "step": 1728 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017966364427484267, + "loss": 1.0674, + "step": 1729 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017964081451976672, + "loss": 1.1153, + "step": 1730 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017961797340964783, + "loss": 1.0586, + "step": 1731 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017959512094774266, + "loss": 1.2388, + "step": 1732 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017957225713730949, + "loss": 1.257, + "step": 1733 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001795493819816081, + "loss": 1.099, + "step": 1734 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001795264954839001, + "loss": 0.9532, + "step": 1735 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017950359764744859, + "loss": 1.2553, + "step": 1736 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017948068847551825, + "loss": 0.9973, + "step": 1737 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017945776797137543, + "loss": 1.0637, + "step": 1738 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017943483613828815, + "loss": 1.1815, + "step": 1739 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017941189297952597, + "loss": 0.8378, + "step": 1740 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017938893849836002, + "loss": 0.9375, + "step": 1741 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017936597269806322, + "loss": 0.9653, + "step": 1742 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001793429955819099, + "loss": 1.221, + "step": 1743 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017932000715317612, + "loss": 1.041, + "step": 1744 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017929700741513955, + "loss": 1.0724, + "step": 1745 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017927399637107945, + "loss": 1.1102, + "step": 1746 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017925097402427667, + "loss": 0.8542, + "step": 1747 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001792279403780137, + "loss": 1.2339, + "step": 1748 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017920489543557465, + "loss": 0.8671, + "step": 1749 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791818392002452, + "loss": 0.9779, + "step": 1750 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791587716753127, + "loss": 1.1242, + "step": 1751 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017913569286406603, + "loss": 0.9043, + "step": 1752 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001791126027697958, + "loss": 0.7996, + "step": 1753 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017908950139579406, + "loss": 0.8602, + "step": 1754 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017906638874535462, + "loss": 1.0161, + "step": 1755 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017904326482177284, + "loss": 0.8226, + "step": 1756 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017902012962834566, + "loss": 1.3885, + "step": 1757 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001789969831683717, + "loss": 1.2158, + "step": 1758 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017897382544515108, + "loss": 1.3261, + "step": 1759 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017895065646198567, + "loss": 1.2144, + "step": 1760 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017892747622217875, + "loss": 0.9881, + "step": 1761 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001789042847290354, + "loss": 1.0342, + "step": 1762 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017888108198586217, + "loss": 0.7883, + "step": 1763 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017885786799596732, + "loss": 0.9006, + "step": 1764 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017883464276266064, + "loss": 1.3695, + "step": 1765 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001788114062892535, + "loss": 1.0303, + "step": 1766 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017878815857905897, + "loss": 1.3816, + "step": 1767 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001787648996353916, + "loss": 0.8684, + "step": 1768 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017874162946156772, + "loss": 1.1157, + "step": 1769 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017871834806090501, + "loss": 1.0087, + "step": 1770 + }, + { + "epoch": 2.1, + "learning_rate": 0.000178695055436723, + "loss": 0.7173, + "step": 1771 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017867175159234265, + "loss": 1.4784, + "step": 1772 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017864843653108662, + "loss": 1.1401, + "step": 1773 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786251102562791, + "loss": 1.0952, + "step": 1774 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786017727712459, + "loss": 0.9443, + "step": 1775 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017857842407931445, + "loss": 1.0682, + "step": 1776 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785550641838138, + "loss": 0.9402, + "step": 1777 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017853169308807448, + "loss": 1.0576, + "step": 1778 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785083107954288, + "loss": 1.1425, + "step": 1779 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017848491730921046, + "loss": 1.1402, + "step": 1780 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017846151263275494, + "loss": 1.4482, + "step": 1781 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017843809676939922, + "loss": 0.7765, + "step": 1782 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017841466972248188, + "loss": 1.1478, + "step": 1783 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001783912314953431, + "loss": 1.1876, + "step": 1784 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017836778209132464, + "loss": 1.2036, + "step": 1785 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783443215137699, + "loss": 1.0297, + "step": 1786 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783208497660239, + "loss": 0.8186, + "step": 1787 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017829736685143308, + "loss": 0.7258, + "step": 1788 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017827387277334568, + "loss": 0.8072, + "step": 1789 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017825036753511144, + "loss": 1.0474, + "step": 1790 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017822685114008167, + "loss": 1.2141, + "step": 1791 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017820332359160928, + "loss": 1.1443, + "step": 1792 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001781797848930488, + "loss": 0.9864, + "step": 1793 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017815623504775636, + "loss": 1.2998, + "step": 1794 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001781326740590896, + "loss": 1.0672, + "step": 1795 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017810910193040785, + "loss": 0.9152, + "step": 1796 + }, + { + "epoch": 2.13, + "learning_rate": 0.000178085518665072, + "loss": 1.2555, + "step": 1797 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017806192426644444, + "loss": 1.2085, + "step": 1798 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017803831873788926, + "loss": 1.6205, + "step": 1799 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001780147020827721, + "loss": 1.3382, + "step": 1800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017799107430446016, + "loss": 1.3309, + "step": 1801 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017796743540632223, + "loss": 1.2556, + "step": 1802 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017794378539172877, + "loss": 0.829, + "step": 1803 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017792012426405166, + "loss": 1.1711, + "step": 1804 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017789645202666456, + "loss": 1.0128, + "step": 1805 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017787276868294253, + "loss": 1.2074, + "step": 1806 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017784907423626237, + "loss": 1.0996, + "step": 1807 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778253686900023, + "loss": 0.9608, + "step": 1808 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778016520475423, + "loss": 0.827, + "step": 1809 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017777792431226383, + "loss": 1.2365, + "step": 1810 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017775418548754993, + "loss": 1.0276, + "step": 1811 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777304355767852, + "loss": 0.8178, + "step": 1812 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777066745833559, + "loss": 1.1297, + "step": 1813 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017768290251064987, + "loss": 1.1737, + "step": 1814 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017765911936205644, + "loss": 1.1606, + "step": 1815 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017763532514096658, + "loss": 1.2605, + "step": 1816 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001776115198507728, + "loss": 1.2271, + "step": 1817 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017758770349486923, + "loss": 0.9407, + "step": 1818 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001775638760766516, + "loss": 1.0273, + "step": 1819 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017754003759951715, + "loss": 1.0746, + "step": 1820 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017751618806686472, + "loss": 1.0091, + "step": 1821 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017749232748209473, + "loss": 0.997, + "step": 1822 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001774684558486092, + "loss": 1.4814, + "step": 1823 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017744457316981168, + "loss": 1.1407, + "step": 1824 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017742067944910737, + "loss": 0.9824, + "step": 1825 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017739677468990293, + "loss": 1.2603, + "step": 1826 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017737285889560668, + "loss": 1.3721, + "step": 1827 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017734893206962853, + "loss": 1.1186, + "step": 1828 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017732499421537984, + "loss": 0.7693, + "step": 1829 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001773010453362737, + "loss": 1.0449, + "step": 1830 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017727708543572467, + "loss": 0.9331, + "step": 1831 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001772531145171489, + "loss": 0.739, + "step": 1832 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017722913258396417, + "loss": 0.9076, + "step": 1833 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017720513963958968, + "loss": 1.3464, + "step": 1834 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017718113568744638, + "loss": 0.8858, + "step": 1835 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017715712073095672, + "loss": 1.3204, + "step": 1836 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017713309477354467, + "loss": 1.0538, + "step": 1837 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001771090578186358, + "loss": 1.44, + "step": 1838 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770850098696573, + "loss": 1.0167, + "step": 1839 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017706095093003785, + "loss": 0.9724, + "step": 1840 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017703688100320774, + "loss": 0.8055, + "step": 1841 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770128000925988, + "loss": 0.7363, + "step": 1842 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017698870820164446, + "loss": 1.1329, + "step": 1843 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017696460533377968, + "loss": 0.9487, + "step": 1844 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017694049149244104, + "loss": 1.2571, + "step": 1845 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001769163666810666, + "loss": 0.9148, + "step": 1846 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017689223090309607, + "loss": 1.4676, + "step": 1847 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017686808416197072, + "loss": 0.9395, + "step": 1848 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017684392646113325, + "loss": 0.9632, + "step": 1849 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017681975780402807, + "loss": 1.0037, + "step": 1850 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001767955781941011, + "loss": 0.9557, + "step": 1851 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017677138763479985, + "loss": 1.2799, + "step": 1852 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017674718612957336, + "loss": 0.8461, + "step": 1853 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001767229736818722, + "loss": 1.2762, + "step": 1854 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017669875029514856, + "loss": 1.4801, + "step": 1855 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017667451597285617, + "loss": 0.9849, + "step": 1856 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766502707184503, + "loss": 1.0875, + "step": 1857 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017662601453538783, + "loss": 0.8346, + "step": 1858 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766017474271271, + "loss": 1.1933, + "step": 1859 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017657746939712815, + "loss": 0.8789, + "step": 1860 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017655318044885245, + "loss": 1.0091, + "step": 1861 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001765288805857631, + "loss": 0.7371, + "step": 1862 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017650456981132466, + "loss": 0.8131, + "step": 1863 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017648024812900342, + "loss": 1.0795, + "step": 1864 + }, + { + "epoch": 2.21, + "learning_rate": 0.000176455915542267, + "loss": 0.9882, + "step": 1865 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017643157205458483, + "loss": 1.212, + "step": 1866 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017640721766942768, + "loss": 1.4755, + "step": 1867 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017638285239026798, + "loss": 1.0391, + "step": 1868 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017635847622057965, + "loss": 1.2568, + "step": 1869 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017633408916383826, + "loss": 1.2138, + "step": 1870 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001763096912235208, + "loss": 1.196, + "step": 1871 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017628528240310596, + "loss": 1.1476, + "step": 1872 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017626086270607384, + "loss": 1.1421, + "step": 1873 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017623643213590619, + "loss": 1.0711, + "step": 1874 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001762119906960863, + "loss": 0.8842, + "step": 1875 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017618753839009893, + "loss": 0.798, + "step": 1876 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761630752214305, + "loss": 0.8591, + "step": 1877 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017613860119356883, + "loss": 0.7646, + "step": 1878 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761141163100035, + "loss": 1.4113, + "step": 1879 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017608962057422549, + "loss": 0.8605, + "step": 1880 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017606511398972731, + "loss": 0.6179, + "step": 1881 + }, + { + "epoch": 2.23, + "eval_loss": 2.3971996307373047, + "eval_runtime": 283.7444, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1881 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760405965600031, + "loss": 0.8651, + "step": 1882 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760160682885485, + "loss": 1.3178, + "step": 1883 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017599152917886071, + "loss": 0.9233, + "step": 1884 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017596697923443847, + "loss": 0.9126, + "step": 1885 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001759424184587821, + "loss": 0.9749, + "step": 1886 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017591784685539334, + "loss": 1.1929, + "step": 1887 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017589326442777565, + "loss": 1.2026, + "step": 1888 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017586867117943392, + "loss": 1.1162, + "step": 1889 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017584406711387463, + "loss": 0.9818, + "step": 1890 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001758194522346057, + "loss": 0.9802, + "step": 1891 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001757948265451368, + "loss": 0.8963, + "step": 1892 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017577019004897897, + "loss": 1.0359, + "step": 1893 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017574554274964478, + "loss": 1.0788, + "step": 1894 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017572088465064848, + "loss": 0.9415, + "step": 1895 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756962157555057, + "loss": 1.0944, + "step": 1896 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017567153606773373, + "loss": 1.357, + "step": 1897 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017564684559085136, + "loss": 1.0108, + "step": 1898 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756221443283789, + "loss": 0.5337, + "step": 1899 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755974322838382, + "loss": 1.4234, + "step": 1900 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755727094607527, + "loss": 0.9083, + "step": 1901 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017554797586264727, + "loss": 0.9199, + "step": 1902 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017552323149304844, + "loss": 1.1885, + "step": 1903 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754984763554842, + "loss": 1.276, + "step": 1904 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754737104534841, + "loss": 0.8882, + "step": 1905 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017544893379057918, + "loss": 0.993, + "step": 1906 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754241463703021, + "loss": 1.261, + "step": 1907 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017539934819618696, + "loss": 0.9877, + "step": 1908 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017537453927176947, + "loss": 0.9991, + "step": 1909 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017534971960058685, + "loss": 1.2012, + "step": 1910 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001753248891861778, + "loss": 0.864, + "step": 1911 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017530004803208263, + "loss": 1.0382, + "step": 1912 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017527519614184316, + "loss": 1.068, + "step": 1913 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017525033351900268, + "loss": 0.8687, + "step": 1914 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752254601671061, + "loss": 1.1174, + "step": 1915 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752005760896998, + "loss": 1.269, + "step": 1916 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751756812903317, + "loss": 0.7387, + "step": 1917 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751507757725513, + "loss": 0.8484, + "step": 1918 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001751258595399095, + "loss": 1.0092, + "step": 1919 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017510093259595885, + "loss": 1.0145, + "step": 1920 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017507599494425344, + "loss": 1.2969, + "step": 1921 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017505104658834875, + "loss": 0.7925, + "step": 1922 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017502608753180196, + "loss": 0.8974, + "step": 1923 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017500111777817164, + "loss": 0.764, + "step": 1924 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001749761373310179, + "loss": 1.1057, + "step": 1925 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017495114619390246, + "loss": 0.8092, + "step": 1926 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017492614437038845, + "loss": 0.9553, + "step": 1927 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017490113186404067, + "loss": 1.0278, + "step": 1928 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748761086784253, + "loss": 1.2152, + "step": 1929 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017485107481711012, + "loss": 1.5154, + "step": 1930 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748260302836644, + "loss": 1.1973, + "step": 1931 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017480097508165896, + "loss": 0.9429, + "step": 1932 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747759092146661, + "loss": 1.5453, + "step": 1933 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747508326862597, + "loss": 1.1691, + "step": 1934 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017472574550001508, + "loss": 1.2094, + "step": 1935 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017470064765950918, + "loss": 1.0777, + "step": 1936 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017467553916832035, + "loss": 1.0883, + "step": 1937 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017465042003002857, + "loss": 0.9297, + "step": 1938 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017462529024821522, + "loss": 0.7814, + "step": 1939 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017460014982646334, + "loss": 1.3645, + "step": 1940 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001745749987683573, + "loss": 1.0604, + "step": 1941 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017454983707748317, + "loss": 0.9416, + "step": 1942 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017452466475742845, + "loss": 1.4187, + "step": 1943 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017449948181178215, + "loss": 1.1619, + "step": 1944 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017447428824413482, + "loss": 1.1381, + "step": 1945 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017444908405807845, + "loss": 1.2304, + "step": 1946 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001744238692572067, + "loss": 1.2149, + "step": 1947 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017439864384511463, + "loss": 0.8172, + "step": 1948 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017437340782539877, + "loss": 1.0783, + "step": 1949 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017434816120165728, + "loss": 1.0661, + "step": 1950 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017432290397748982, + "loss": 1.1959, + "step": 1951 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001742976361564974, + "loss": 1.0581, + "step": 1952 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017427235774228274, + "loss": 0.8948, + "step": 1953 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017424706873845, + "loss": 1.2565, + "step": 1954 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017422176914860476, + "loss": 0.9237, + "step": 1955 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017419645897635432, + "loss": 1.219, + "step": 1956 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017417113822530727, + "loss": 1.4606, + "step": 1957 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017414580689907377, + "loss": 0.714, + "step": 1958 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001741204650012656, + "loss": 1.2223, + "step": 1959 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017409511253549593, + "loss": 0.9828, + "step": 1960 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017406974950537942, + "loss": 0.9954, + "step": 1961 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017404437591453235, + "loss": 1.0307, + "step": 1962 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001740189917665724, + "loss": 0.9331, + "step": 1963 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739935970651188, + "loss": 1.3517, + "step": 1964 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017396819181379232, + "loss": 1.2024, + "step": 1965 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739427760162151, + "loss": 0.9696, + "step": 1966 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017391734967601102, + "loss": 1.1559, + "step": 1967 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001738919127968052, + "loss": 1.3104, + "step": 1968 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017386646538222443, + "loss": 0.9073, + "step": 1969 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017384100743589697, + "loss": 1.0539, + "step": 1970 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017381553896145255, + "loss": 0.9873, + "step": 1971 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737900599625224, + "loss": 0.9466, + "step": 1972 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737645704427393, + "loss": 1.0639, + "step": 1973 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737390704057375, + "loss": 0.5843, + "step": 1974 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017371355985515275, + "loss": 1.1318, + "step": 1975 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017368803879462227, + "loss": 1.0116, + "step": 1976 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001736625072277848, + "loss": 0.8845, + "step": 1977 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017363696515828062, + "loss": 0.8081, + "step": 1978 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017361141258975148, + "loss": 0.8795, + "step": 1979 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735858495258406, + "loss": 0.9725, + "step": 1980 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735602759701927, + "loss": 1.0164, + "step": 1981 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017353469192645405, + "loss": 1.2937, + "step": 1982 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735090973982723, + "loss": 1.0842, + "step": 1983 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017348349238929678, + "loss": 1.0043, + "step": 1984 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017345787690317815, + "loss": 1.1302, + "step": 1985 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017343225094356855, + "loss": 1.195, + "step": 1986 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017340661451412183, + "loss": 1.1449, + "step": 1987 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017338096761849309, + "loss": 1.2244, + "step": 1988 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017335531026033897, + "loss": 0.9273, + "step": 1989 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017332964244331776, + "loss": 1.0448, + "step": 1990 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017330396417108908, + "loss": 1.0074, + "step": 1991 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017327827544731412, + "loss": 0.9284, + "step": 1992 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001732525762756555, + "loss": 1.0307, + "step": 1993 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017322686665977737, + "loss": 1.1526, + "step": 1994 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017320114660334535, + "loss": 0.819, + "step": 1995 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017317541611002656, + "loss": 1.1029, + "step": 1996 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017314967518348962, + "loss": 1.2471, + "step": 1997 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017312392382740462, + "loss": 1.0156, + "step": 1998 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017309816204544317, + "loss": 1.1843, + "step": 1999 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017307238984127832, + "loss": 1.1588, + "step": 2000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017304660721858457, + "loss": 1.0157, + "step": 2001 + }, + { + "epoch": 2.38, + "learning_rate": 0.000173020814181038, + "loss": 1.0563, + "step": 2002 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017299501073231622, + "loss": 1.1883, + "step": 2003 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017296919687609808, + "loss": 0.9404, + "step": 2004 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017294337261606417, + "loss": 1.2495, + "step": 2005 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017291753795589643, + "loss": 1.0074, + "step": 2006 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017289169289927837, + "loss": 1.1411, + "step": 2007 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017286583744989488, + "loss": 0.9942, + "step": 2008 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017283997161143239, + "loss": 0.952, + "step": 2009 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017281409538757883, + "loss": 1.2966, + "step": 2010 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017278820878202357, + "loss": 1.0836, + "step": 2011 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727623117984575, + "loss": 1.0984, + "step": 2012 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727364044405729, + "loss": 0.8822, + "step": 2013 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017271048671206366, + "loss": 1.2014, + "step": 2014 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017268455861662503, + "loss": 1.1779, + "step": 2015 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017265862015795384, + "loss": 0.9966, + "step": 2016 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017263267133974832, + "loss": 0.9536, + "step": 2017 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017260671216570822, + "loss": 0.811, + "step": 2018 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017258074263953472, + "loss": 0.8241, + "step": 2019 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017255476276493056, + "loss": 1.1263, + "step": 2020 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017252877254559986, + "loss": 0.995, + "step": 2021 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001725027719852483, + "loss": 1.1481, + "step": 2022 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001724767610875829, + "loss": 1.129, + "step": 2023 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017245073985631238, + "loss": 0.5928, + "step": 2024 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017242470829514672, + "loss": 0.8326, + "step": 2025 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017239866640779745, + "loss": 1.1092, + "step": 2026 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017237261419797756, + "loss": 1.5015, + "step": 2027 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001723465516694016, + "loss": 0.9775, + "step": 2028 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017232047882578548, + "loss": 0.9348, + "step": 2029 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001722943956708466, + "loss": 0.6199, + "step": 2030 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017226830220830384, + "loss": 1.1485, + "step": 2031 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017224219844187764, + "loss": 1.1195, + "step": 2032 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017221608437528973, + "loss": 1.0528, + "step": 2033 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017218996001226345, + "loss": 1.1058, + "step": 2034 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017216382535652355, + "loss": 1.1451, + "step": 2035 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001721376804117963, + "loss": 1.2251, + "step": 2036 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017211152518180936, + "loss": 1.0708, + "step": 2037 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017208535967029188, + "loss": 1.0746, + "step": 2038 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017205918388097456, + "loss": 1.3262, + "step": 2039 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017203299781758943, + "loss": 0.7619, + "step": 2040 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017200680148387007, + "loss": 1.01, + "step": 2041 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001719805948835515, + "loss": 1.1651, + "step": 2042 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017195437802037026, + "loss": 1.4671, + "step": 2043 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017192815089806424, + "loss": 0.9857, + "step": 2044 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001719019135203729, + "loss": 1.2613, + "step": 2045 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017187566589103704, + "loss": 1.4386, + "step": 2046 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001718494080137991, + "loss": 1.0965, + "step": 2047 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017182313989240285, + "loss": 0.752, + "step": 2048 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017179686153059352, + "loss": 0.9126, + "step": 2049 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017177057293211784, + "loss": 1.5075, + "step": 2050 + }, + { + "epoch": 2.43, + "learning_rate": 0.000171744274100724, + "loss": 1.0407, + "step": 2051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017171796504016166, + "loss": 0.8263, + "step": 2052 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001716916457541819, + "loss": 0.9453, + "step": 2053 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017166531624653722, + "loss": 0.9777, + "step": 2054 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017163897652098172, + "loss": 1.2129, + "step": 2055 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017161262658127086, + "loss": 1.3642, + "step": 2056 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017158626643116152, + "loss": 0.6798, + "step": 2057 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017155989607441213, + "loss": 0.874, + "step": 2058 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017153351551478247, + "loss": 1.0636, + "step": 2059 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001715071247560339, + "loss": 1.0563, + "step": 2060 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714807238019291, + "loss": 1.1984, + "step": 2061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017145431265623234, + "loss": 0.9444, + "step": 2062 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714278913227092, + "loss": 0.7809, + "step": 2063 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017140145980512684, + "loss": 1.649, + "step": 2064 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713750181072538, + "loss": 1.0956, + "step": 2065 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713485662328601, + "loss": 1.2845, + "step": 2066 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017132210418571714, + "loss": 1.0484, + "step": 2067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017129563196959793, + "loss": 1.0291, + "step": 2068 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017126914958827679, + "loss": 1.1226, + "step": 2069 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001712426570455295, + "loss": 1.0119, + "step": 2070 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017121615434513332, + "loss": 1.1663, + "step": 2071 + }, + { + "epoch": 2.46, + "learning_rate": 0.000171189641490867, + "loss": 1.1353, + "step": 2072 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017116311848651064, + "loss": 1.0761, + "step": 2073 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017113658533584594, + "loss": 1.1978, + "step": 2074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017111004204265582, + "loss": 1.3881, + "step": 2075 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017108348861072484, + "loss": 1.3945, + "step": 2076 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017105692504383897, + "loss": 1.3796, + "step": 2077 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017103035134578555, + "loss": 1.1721, + "step": 2078 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001710037675203534, + "loss": 1.0061, + "step": 2079 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017097717357133284, + "loss": 1.2456, + "step": 2080 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017095056950251555, + "loss": 0.788, + "step": 2081 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001709239553176947, + "loss": 1.16, + "step": 2082 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001708973310206649, + "loss": 1.0498, + "step": 2083 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017087069661522218, + "loss": 0.8993, + "step": 2084 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017084405210516406, + "loss": 1.2088, + "step": 2085 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001708173974942894, + "loss": 1.0897, + "step": 2086 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017079073278639863, + "loss": 1.2718, + "step": 2087 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017076405798529355, + "loss": 1.2325, + "step": 2088 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017073737309477736, + "loss": 1.0555, + "step": 2089 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017071067811865476, + "loss": 1.1428, + "step": 2090 + }, + { + "epoch": 2.48, + "eval_loss": 2.3191208839416504, + "eval_runtime": 284.1375, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2090 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706839730607319, + "loss": 1.0908, + "step": 2091 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706572579248163, + "loss": 1.2092, + "step": 2092 + }, + { + "epoch": 2.48, + "learning_rate": 0.000170630532714717, + "loss": 1.1735, + "step": 2093 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001706037974342444, + "loss": 1.2716, + "step": 2094 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017057705208721035, + "loss": 1.0095, + "step": 2095 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001705502966774282, + "loss": 1.3059, + "step": 2096 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017052353120871266, + "loss": 0.8269, + "step": 2097 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001704967556848799, + "loss": 1.0615, + "step": 2098 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017046997010974755, + "loss": 1.2709, + "step": 2099 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017044317448713461, + "loss": 1.1633, + "step": 2100 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017041636882086158, + "loss": 0.9273, + "step": 2101 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017038955311475038, + "loss": 1.3117, + "step": 2102 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001703627273726243, + "loss": 0.8883, + "step": 2103 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017033589159830815, + "loss": 1.1371, + "step": 2104 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017030904579562806, + "loss": 1.5402, + "step": 2105 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017028218996841172, + "loss": 0.9156, + "step": 2106 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017025532412048817, + "loss": 1.0962, + "step": 2107 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001702284482556879, + "loss": 0.9402, + "step": 2108 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017020156237784279, + "loss": 0.8146, + "step": 2109 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001701746664907862, + "loss": 1.1718, + "step": 2110 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017014776059835288, + "loss": 1.0618, + "step": 2111 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017012084470437907, + "loss": 1.4796, + "step": 2112 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017009391881270237, + "loss": 0.8402, + "step": 2113 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017006698292716178, + "loss": 1.1641, + "step": 2114 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001700400370515978, + "loss": 1.241, + "step": 2115 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017001308118985237, + "loss": 0.8683, + "step": 2116 + }, + { + "epoch": 2.51, + "learning_rate": 0.00016998611534576873, + "loss": 1.2697, + "step": 2117 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016995913952319168, + "loss": 0.9233, + "step": 2118 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016993215372596737, + "loss": 1.2472, + "step": 2119 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016990515795794334, + "loss": 1.2541, + "step": 2120 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016987815222296865, + "loss": 1.0016, + "step": 2121 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016985113652489374, + "loss": 1.0678, + "step": 2122 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016982411086757037, + "loss": 1.6066, + "step": 2123 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016979707525485192, + "loss": 1.229, + "step": 2124 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016977002969059302, + "loss": 0.752, + "step": 2125 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016974297417864977, + "loss": 0.8752, + "step": 2126 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001697159087228797, + "loss": 0.8896, + "step": 2127 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016968883332714186, + "loss": 0.9657, + "step": 2128 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696617479952964, + "loss": 1.3657, + "step": 2129 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696346527312053, + "loss": 0.9876, + "step": 2130 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016960754753873162, + "loss": 1.0165, + "step": 2131 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016958043242174003, + "loss": 1.625, + "step": 2132 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016955330738409655, + "loss": 1.5502, + "step": 2133 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016952617242966864, + "loss": 1.0793, + "step": 2134 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016949902756232507, + "loss": 1.4425, + "step": 2135 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016947187278593622, + "loss": 1.3124, + "step": 2136 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016944470810437365, + "loss": 0.927, + "step": 2137 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016941753352151055, + "loss": 1.1911, + "step": 2138 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016939034904122138, + "loss": 1.0768, + "step": 2139 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016936315466738205, + "loss": 1.1277, + "step": 2140 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016933595040386984, + "loss": 0.812, + "step": 2141 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001693087362545636, + "loss": 0.8299, + "step": 2142 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016928151222334338, + "loss": 1.1125, + "step": 2143 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016925427831409077, + "loss": 1.1835, + "step": 2144 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016922703453068873, + "loss": 1.2007, + "step": 2145 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016919978087702163, + "loss": 0.8524, + "step": 2146 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016917251735697523, + "loss": 0.9497, + "step": 2147 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016914524397443673, + "loss": 1.1004, + "step": 2148 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016911796073329466, + "loss": 0.8347, + "step": 2149 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016909066763743912, + "loss": 0.9492, + "step": 2150 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016906336469076148, + "loss": 1.1406, + "step": 2151 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016903605189715447, + "loss": 1.0137, + "step": 2152 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001690087292605124, + "loss": 1.0624, + "step": 2153 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016898139678473076, + "loss": 1.1767, + "step": 2154 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001689540544737067, + "loss": 1.4184, + "step": 2155 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016892670233133856, + "loss": 0.957, + "step": 2156 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016889934036152618, + "loss": 1.0399, + "step": 2157 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016887196856817073, + "loss": 1.2009, + "step": 2158 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016884458695517495, + "loss": 1.3977, + "step": 2159 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016881719552644273, + "loss": 1.1328, + "step": 2160 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016878979428587955, + "loss": 1.5007, + "step": 2161 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016876238323739221, + "loss": 1.1248, + "step": 2162 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016873496238488899, + "loss": 1.0358, + "step": 2163 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016870753173227945, + "loss": 1.2961, + "step": 2164 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016868009128347459, + "loss": 0.9435, + "step": 2165 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016865264104238683, + "loss": 0.9642, + "step": 2166 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016862518101293, + "loss": 1.0169, + "step": 2167 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016859771119901929, + "loss": 1.0904, + "step": 2168 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001685702316045713, + "loss": 1.3178, + "step": 2169 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016854274223350397, + "loss": 1.1395, + "step": 2170 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016851524308973678, + "loss": 1.1207, + "step": 2171 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016848773417719044, + "loss": 1.3544, + "step": 2172 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016846021549978715, + "loss": 1.3503, + "step": 2173 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016843268706145042, + "loss": 1.4276, + "step": 2174 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016840514886610529, + "loss": 0.9888, + "step": 2175 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016837760091767802, + "loss": 1.0913, + "step": 2176 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001683500432200964, + "loss": 1.4781, + "step": 2177 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016832247577728955, + "loss": 1.2657, + "step": 2178 + }, + { + "epoch": 2.59, + "learning_rate": 0.000168294898593188, + "loss": 0.9206, + "step": 2179 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682673116717236, + "loss": 0.9218, + "step": 2180 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682397150168297, + "loss": 1.2719, + "step": 2181 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016821210863244096, + "loss": 0.984, + "step": 2182 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016818449252249345, + "loss": 1.4641, + "step": 2183 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001681568666909246, + "loss": 1.2571, + "step": 2184 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016812923114167328, + "loss": 1.2025, + "step": 2185 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016810158587867973, + "loss": 0.9621, + "step": 2186 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016807393090588553, + "loss": 1.0016, + "step": 2187 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016804626622723368, + "loss": 1.031, + "step": 2188 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016801859184666857, + "loss": 0.7573, + "step": 2189 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016799090776813597, + "loss": 1.2694, + "step": 2190 + }, + { + "epoch": 2.6, + "learning_rate": 0.000167963213995583, + "loss": 1.196, + "step": 2191 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016793551053295822, + "loss": 0.8754, + "step": 2192 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016790779738421152, + "loss": 1.1743, + "step": 2193 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678800745532942, + "loss": 1.0921, + "step": 2194 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016785234204415888, + "loss": 0.8778, + "step": 2195 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678245998607597, + "loss": 1.0528, + "step": 2196 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016779684800705203, + "loss": 1.0255, + "step": 2197 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001677690864869927, + "loss": 0.6344, + "step": 2198 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016774131530453992, + "loss": 0.8691, + "step": 2199 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016771353446365318, + "loss": 1.2061, + "step": 2200 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001676857439682935, + "loss": 1.1759, + "step": 2201 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016765794382242314, + "loss": 1.1118, + "step": 2202 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016763013403000584, + "loss": 1.3005, + "step": 2203 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016760231459500666, + "loss": 1.0415, + "step": 2204 + }, + { + "epoch": 2.62, + "learning_rate": 0.000167574485521392, + "loss": 0.824, + "step": 2205 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016754664681312975, + "loss": 0.6682, + "step": 2206 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016751879847418905, + "loss": 1.9204, + "step": 2207 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016749094050854047, + "loss": 0.9931, + "step": 2208 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016746307292015602, + "loss": 0.8898, + "step": 2209 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016743519571300888, + "loss": 1.3337, + "step": 2210 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016740730889107383, + "loss": 1.2947, + "step": 2211 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673794124583269, + "loss": 1.1882, + "step": 2212 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673515064187455, + "loss": 1.5408, + "step": 2213 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016732359077630847, + "loss": 1.1273, + "step": 2214 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001672956655349959, + "loss": 0.8954, + "step": 2215 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016726773069878934, + "loss": 1.1747, + "step": 2216 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016723978627167173, + "loss": 0.807, + "step": 2217 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016721183225762727, + "loss": 1.2512, + "step": 2218 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016718386866064166, + "loss": 1.0796, + "step": 2219 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016715589548470185, + "loss": 1.0905, + "step": 2220 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016712791273379622, + "loss": 1.3779, + "step": 2221 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016709992041191452, + "loss": 1.2015, + "step": 2222 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016707191852304782, + "loss": 0.8612, + "step": 2223 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001670439070711886, + "loss": 1.1819, + "step": 2224 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016701588606033064, + "loss": 1.2715, + "step": 2225 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001669878554944692, + "loss": 1.3681, + "step": 2226 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016695981537760072, + "loss": 1.1254, + "step": 2227 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669317657137232, + "loss": 0.9476, + "step": 2228 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669037065068359, + "loss": 1.235, + "step": 2229 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016687563776093941, + "loss": 0.7356, + "step": 2230 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016684755948003573, + "loss": 0.7901, + "step": 2231 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016681947166812824, + "loss": 1.317, + "step": 2232 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016679137432922163, + "loss": 0.8832, + "step": 2233 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016676326746732195, + "loss": 1.2776, + "step": 2234 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016673515108643665, + "loss": 1.0435, + "step": 2235 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001667070251905745, + "loss": 1.0957, + "step": 2236 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016667888978374567, + "loss": 1.0862, + "step": 2237 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016665074486996165, + "loss": 1.1112, + "step": 2238 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001666225904532352, + "loss": 1.3633, + "step": 2239 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016659442653758064, + "loss": 1.444, + "step": 2240 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016656625312701348, + "loss": 0.8248, + "step": 2241 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016653807022555067, + "loss": 1.2522, + "step": 2242 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001665098778372104, + "loss": 1.2107, + "step": 2243 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001664816759660124, + "loss": 1.0813, + "step": 2244 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016645346461597753, + "loss": 1.1136, + "step": 2245 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016642524379112817, + "loss": 1.1003, + "step": 2246 + }, + { + "epoch": 2.67, + "learning_rate": 0.000166397013495488, + "loss": 1.0635, + "step": 2247 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016636877373308204, + "loss": 1.0575, + "step": 2248 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016634052450793663, + "loss": 0.7693, + "step": 2249 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016631226582407952, + "loss": 1.5965, + "step": 2250 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001662839976855398, + "loss": 1.0989, + "step": 2251 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016625572009634787, + "loss": 0.9198, + "step": 2252 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016622743306053548, + "loss": 1.0896, + "step": 2253 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016619913658213578, + "loss": 1.015, + "step": 2254 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001661708306651832, + "loss": 0.8572, + "step": 2255 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016614251531371353, + "loss": 1.1508, + "step": 2256 + }, + { + "epoch": 2.68, + "learning_rate": 0.000166114190531764, + "loss": 1.1852, + "step": 2257 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016608585632337306, + "loss": 0.932, + "step": 2258 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016605751269258053, + "loss": 1.2542, + "step": 2259 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016602915964342757, + "loss": 0.943, + "step": 2260 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016600079717995678, + "loss": 1.2438, + "step": 2261 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016597242530621203, + "loss": 0.9928, + "step": 2262 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016594404402623845, + "loss": 0.9516, + "step": 2263 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016591565334408265, + "loss": 1.1689, + "step": 2264 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001658872532637925, + "loss": 1.3155, + "step": 2265 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016585884378941725, + "loss": 1.1596, + "step": 2266 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016583042492500746, + "loss": 0.9956, + "step": 2267 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016580199667461508, + "loss": 0.9289, + "step": 2268 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016577355904229325, + "loss": 1.3225, + "step": 2269 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016574511203209667, + "loss": 1.0384, + "step": 2270 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001657166556480812, + "loss": 0.697, + "step": 2271 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016568818989430416, + "loss": 0.7702, + "step": 2272 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016565971477482404, + "loss": 1.1041, + "step": 2273 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016563123029370093, + "loss": 1.0462, + "step": 2274 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001656027364549959, + "loss": 1.0797, + "step": 2275 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001655742332627717, + "loss": 1.3301, + "step": 2276 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001655457207210922, + "loss": 1.0467, + "step": 2277 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016551719883402271, + "loss": 0.9432, + "step": 2278 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016548866760562978, + "loss": 1.1808, + "step": 2279 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016546012703998138, + "loss": 1.1094, + "step": 2280 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016543157714114673, + "loss": 1.3914, + "step": 2281 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016540301791319645, + "loss": 1.0402, + "step": 2282 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016537444936020246, + "loss": 0.9815, + "step": 2283 + }, + { + "epoch": 2.71, + "learning_rate": 0.000165345871486238, + "loss": 0.9722, + "step": 2284 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016531728429537766, + "loss": 0.919, + "step": 2285 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016528868779169738, + "loss": 1.1242, + "step": 2286 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016526008197927436, + "loss": 1.1794, + "step": 2287 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016523146686218718, + "loss": 1.434, + "step": 2288 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016520284244451574, + "loss": 0.8463, + "step": 2289 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016517420873034123, + "loss": 1.1736, + "step": 2290 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001651455657237462, + "loss": 1.0431, + "step": 2291 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016511691342881453, + "loss": 1.2796, + "step": 2292 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650882518496314, + "loss": 1.0578, + "step": 2293 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016505958099028334, + "loss": 1.3914, + "step": 2294 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650309008548582, + "loss": 1.0046, + "step": 2295 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650022114474451, + "loss": 1.0246, + "step": 2296 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016497351277213458, + "loss": 1.2789, + "step": 2297 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016494480483301836, + "loss": 1.0036, + "step": 2298 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016491608763418968, + "loss": 0.886, + "step": 2299 + }, + { + "epoch": 2.73, + "eval_loss": 2.3017475605010986, + "eval_runtime": 283.8846, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2299 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648873611797429, + "loss": 1.3953, + "step": 2300 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648586254737738, + "loss": 0.6972, + "step": 2301 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016482988052037947, + "loss": 1.2311, + "step": 2302 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016480112632365833, + "loss": 1.327, + "step": 2303 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647723628877101, + "loss": 0.9534, + "step": 2304 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647435902166358, + "loss": 0.9164, + "step": 2305 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647148083145378, + "loss": 1.1038, + "step": 2306 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016468601718551976, + "loss": 1.0444, + "step": 2307 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016465721683368666, + "loss": 1.2635, + "step": 2308 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016462840726314486, + "loss": 1.1647, + "step": 2309 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016459958847800187, + "loss": 1.3617, + "step": 2310 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016457076048236675, + "loss": 1.2355, + "step": 2311 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016454192328034962, + "loss": 0.9989, + "step": 2312 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016451307687606213, + "loss": 1.1218, + "step": 2313 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016448422127361706, + "loss": 0.8967, + "step": 2314 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644553564771287, + "loss": 1.159, + "step": 2315 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644264824907124, + "loss": 1.5901, + "step": 2316 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001643975993184851, + "loss": 0.979, + "step": 2317 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016436870696456482, + "loss": 0.8561, + "step": 2318 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016433980543307107, + "loss": 0.9485, + "step": 2319 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016431089472812444, + "loss": 0.7736, + "step": 2320 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016428197485384707, + "loss": 1.2546, + "step": 2321 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016425304581436226, + "loss": 0.9534, + "step": 2322 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001642241076137947, + "loss": 0.8182, + "step": 2323 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641951602562703, + "loss": 1.1107, + "step": 2324 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641662037459164, + "loss": 1.0628, + "step": 2325 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016413723808686147, + "loss": 1.6261, + "step": 2326 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001641082632832354, + "loss": 1.0286, + "step": 2327 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001640792793391694, + "loss": 0.5732, + "step": 2328 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016405028625879594, + "loss": 1.0932, + "step": 2329 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016402128404624882, + "loss": 1.2585, + "step": 2330 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016399227270566308, + "loss": 0.8788, + "step": 2331 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639632522411751, + "loss": 1.1397, + "step": 2332 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016393422265692262, + "loss": 1.3517, + "step": 2333 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639051839570446, + "loss": 1.1346, + "step": 2334 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016387613614568126, + "loss": 0.9594, + "step": 2335 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001638470792269743, + "loss": 1.0674, + "step": 2336 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016381801320506653, + "loss": 0.9123, + "step": 2337 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016378893808410215, + "loss": 1.1909, + "step": 2338 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016375985386822664, + "loss": 1.0474, + "step": 2339 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016373076056158675, + "loss": 0.8844, + "step": 2340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001637016581683306, + "loss": 1.1606, + "step": 2341 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016367254669260749, + "loss": 0.6206, + "step": 2342 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016364342613856816, + "loss": 0.7225, + "step": 2343 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016361429651036446, + "loss": 1.1782, + "step": 2344 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016358515781214977, + "loss": 1.0911, + "step": 2345 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016355601004807856, + "loss": 1.2727, + "step": 2346 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016352685322230663, + "loss": 0.8294, + "step": 2347 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016349768733899117, + "loss": 1.1661, + "step": 2348 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016346851240229057, + "loss": 0.8267, + "step": 2349 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016343932841636456, + "loss": 1.2873, + "step": 2350 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016341013538537412, + "loss": 1.2459, + "step": 2351 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016338093331348156, + "loss": 0.8939, + "step": 2352 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016335172220485042, + "loss": 1.024, + "step": 2353 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001633225020636456, + "loss": 0.9981, + "step": 2354 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016329327289403325, + "loss": 1.331, + "step": 2355 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016326403470018084, + "loss": 0.7446, + "step": 2356 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016323478748625703, + "loss": 1.1931, + "step": 2357 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016320553125643187, + "loss": 1.1287, + "step": 2358 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016317626601487667, + "loss": 1.109, + "step": 2359 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016314699176576402, + "loss": 0.9946, + "step": 2360 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016311770851326778, + "loss": 0.8347, + "step": 2361 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016308841626156307, + "loss": 0.9214, + "step": 2362 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001630591150148264, + "loss": 0.5907, + "step": 2363 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016302980477723539, + "loss": 1.2412, + "step": 2364 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016300048555296915, + "loss": 1.2908, + "step": 2365 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016297115734620788, + "loss": 1.2345, + "step": 2366 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016294182016113315, + "loss": 1.0418, + "step": 2367 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016291247400192785, + "loss": 1.1457, + "step": 2368 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016288311887277608, + "loss": 1.2529, + "step": 2369 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016285375477786322, + "loss": 1.0013, + "step": 2370 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016282438172137597, + "loss": 0.943, + "step": 2371 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016279499970750226, + "loss": 0.7009, + "step": 2372 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016276560874043137, + "loss": 0.9408, + "step": 2373 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627362088243538, + "loss": 1.1788, + "step": 2374 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627067999634613, + "loss": 0.8106, + "step": 2375 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016267738216194696, + "loss": 1.1695, + "step": 2376 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001626479554240051, + "loss": 0.9209, + "step": 2377 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016261851975383137, + "loss": 0.9911, + "step": 2378 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016258907515562262, + "loss": 1.3819, + "step": 2379 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162559621633577, + "loss": 0.8926, + "step": 2380 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162530159191894, + "loss": 1.0896, + "step": 2381 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016250068783477424, + "loss": 0.8403, + "step": 2382 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016247120756641972, + "loss": 0.7976, + "step": 2383 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001624417183910337, + "loss": 0.8881, + "step": 2384 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001624122203128207, + "loss": 0.8302, + "step": 2385 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623827133359865, + "loss": 1.3312, + "step": 2386 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623531974647381, + "loss": 1.003, + "step": 2387 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623236727032839, + "loss": 0.9487, + "step": 2388 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016229413905583342, + "loss": 1.2259, + "step": 2389 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016226459652659753, + "loss": 0.9327, + "step": 2390 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016223504511978838, + "loss": 0.7336, + "step": 2391 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016220548483961934, + "loss": 1.0454, + "step": 2392 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016217591569030505, + "loss": 1.3371, + "step": 2393 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016214633767606143, + "loss": 1.0814, + "step": 2394 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016211675080110566, + "loss": 1.2274, + "step": 2395 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001620871550696562, + "loss": 0.9775, + "step": 2396 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016205755048593273, + "loss": 1.0323, + "step": 2397 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016202793705415622, + "loss": 1.5101, + "step": 2398 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016199831477854893, + "loss": 0.8118, + "step": 2399 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001619686836633343, + "loss": 1.0233, + "step": 2400 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016193904371273715, + "loss": 0.9038, + "step": 2401 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016190939493098344, + "loss": 0.875, + "step": 2402 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016187973732230038, + "loss": 1.3274, + "step": 2403 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016185007089091665, + "loss": 1.081, + "step": 2404 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016182039564106192, + "loss": 1.0841, + "step": 2405 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016179071157696728, + "loss": 1.3208, + "step": 2406 + }, + { + "epoch": 2.86, + "learning_rate": 0.000161761018702865, + "loss": 1.1854, + "step": 2407 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617313170229887, + "loss": 1.0651, + "step": 2408 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617016065415731, + "loss": 1.1398, + "step": 2409 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016167188726285434, + "loss": 1.2778, + "step": 2410 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016164215919106968, + "loss": 1.6758, + "step": 2411 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001616124223304577, + "loss": 0.8341, + "step": 2412 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016158267668525832, + "loss": 0.9513, + "step": 2413 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016155292225971253, + "loss": 0.9617, + "step": 2414 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016152315905806268, + "loss": 0.8664, + "step": 2415 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016149338708455237, + "loss": 1.331, + "step": 2416 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016146360634342643, + "loss": 1.4212, + "step": 2417 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016143381683893094, + "loss": 1.2126, + "step": 2418 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016140401857531322, + "loss": 0.934, + "step": 2419 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016137421155682183, + "loss": 1.2417, + "step": 2420 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001613443957877067, + "loss": 1.637, + "step": 2421 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016131457127221881, + "loss": 1.1456, + "step": 2422 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016128473801461053, + "loss": 0.9402, + "step": 2423 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612548960191354, + "loss": 1.3797, + "step": 2424 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612250452900483, + "loss": 0.8191, + "step": 2425 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001611951858316052, + "loss": 1.1725, + "step": 2426 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016116531764806346, + "loss": 1.5701, + "step": 2427 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016113544074368164, + "loss": 1.0591, + "step": 2428 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016110555512271953, + "loss": 1.03, + "step": 2429 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001610756607894382, + "loss": 1.1829, + "step": 2430 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016104575774809985, + "loss": 1.2222, + "step": 2431 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016101584600296804, + "loss": 1.1537, + "step": 2432 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016098592555830753, + "loss": 1.0973, + "step": 2433 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016095599641838436, + "loss": 1.0793, + "step": 2434 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016092605858746573, + "loss": 1.3484, + "step": 2435 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608961120698201, + "loss": 1.1689, + "step": 2436 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016086615686971726, + "loss": 1.0864, + "step": 2437 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016083619299142813, + "loss": 1.2451, + "step": 2438 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608062204392249, + "loss": 0.9593, + "step": 2439 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016077623921738102, + "loss": 0.9816, + "step": 2440 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016074624933017112, + "loss": 1.0845, + "step": 2441 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016071625078187114, + "loss": 0.9875, + "step": 2442 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001606862435767582, + "loss": 0.8758, + "step": 2443 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016065622771911067, + "loss": 0.9499, + "step": 2444 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016062620321320823, + "loss": 1.1133, + "step": 2445 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605961700633316, + "loss": 0.7228, + "step": 2446 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016056612827376293, + "loss": 1.2297, + "step": 2447 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605360778487855, + "loss": 1.0251, + "step": 2448 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016050601879268386, + "loss": 0.8097, + "step": 2449 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016047595110974376, + "loss": 0.9872, + "step": 2450 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001604458748042522, + "loss": 1.1119, + "step": 2451 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001604157898804974, + "loss": 0.8256, + "step": 2452 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016038569634276882, + "loss": 0.9036, + "step": 2453 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016035559419535716, + "loss": 1.1173, + "step": 2454 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016032548344255428, + "loss": 1.3173, + "step": 2455 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016029536408865337, + "loss": 0.717, + "step": 2456 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016026523613794878, + "loss": 0.9806, + "step": 2457 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016023509959473605, + "loss": 1.1509, + "step": 2458 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016020495446331207, + "loss": 1.0454, + "step": 2459 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601748007479748, + "loss": 1.183, + "step": 2460 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601446384530236, + "loss": 1.2611, + "step": 2461 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016011446758275888, + "loss": 1.0377, + "step": 2462 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016008428814148236, + "loss": 1.2111, + "step": 2463 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016005410013349698, + "loss": 1.0952, + "step": 2464 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016002390356310685, + "loss": 0.7589, + "step": 2465 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015999369843461742, + "loss": 0.8543, + "step": 2466 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015996348475233525, + "loss": 1.1509, + "step": 2467 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001599332625205681, + "loss": 1.287, + "step": 2468 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015990303174362512, + "loss": 1.0401, + "step": 2469 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001598727924258164, + "loss": 1.0247, + "step": 2470 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015984254457145354, + "loss": 1.1537, + "step": 2471 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015981228818484917, + "loss": 0.9606, + "step": 2472 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001597820232703172, + "loss": 0.8709, + "step": 2473 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015975174983217275, + "loss": 1.2827, + "step": 2474 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015972146787473213, + "loss": 0.8057, + "step": 2475 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001596911774023129, + "loss": 1.0857, + "step": 2476 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015966087841923386, + "loss": 1.1731, + "step": 2477 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001596305709298149, + "loss": 0.8871, + "step": 2478 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015960025493837727, + "loss": 1.0671, + "step": 2479 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015956993044924334, + "loss": 1.3735, + "step": 2480 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015953959746673675, + "loss": 1.4655, + "step": 2481 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015950925599518228, + "loss": 1.3975, + "step": 2482 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015947890603890602, + "loss": 0.9468, + "step": 2483 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001594485476022352, + "loss": 0.9976, + "step": 2484 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015941818068949818, + "loss": 0.6732, + "step": 2485 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015938780530502474, + "loss": 0.9848, + "step": 2486 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015935742145314568, + "loss": 1.2441, + "step": 2487 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001593270291381931, + "loss": 0.9631, + "step": 2488 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015929662836450029, + "loss": 0.8868, + "step": 2489 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592662191364017, + "loss": 0.9063, + "step": 2490 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015923580145823303, + "loss": 0.6886, + "step": 2491 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592053753343312, + "loss": 1.0702, + "step": 2492 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591749407690343, + "loss": 1.3879, + "step": 2493 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015914449776668167, + "loss": 1.1048, + "step": 2494 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591140463316137, + "loss": 0.9921, + "step": 2495 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015908358646817225, + "loss": 1.3042, + "step": 2496 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015905311818070015, + "loss": 0.8413, + "step": 2497 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015902264147354153, + "loss": 1.5201, + "step": 2498 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589921563510417, + "loss": 1.0727, + "step": 2499 + } + ], + "logging_steps": 1, + "max_steps": 8330, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 833, + "total_flos": 8.759182245299749e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2499/trainer_state.json:com.dropbox.attrs b/checkpoint-2499/trainer_state.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..9fd6611e12b41e1b2fd70f17d37311c319364183 Binary files /dev/null and b/checkpoint-2499/trainer_state.json:com.dropbox.attrs differ diff --git a/checkpoint-2499/training_args.bin b/checkpoint-2499/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207 --- /dev/null +++ b/checkpoint-2499/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869 +size 4859 diff --git a/checkpoint-2499/training_args.bin:com.dropbox.attrs b/checkpoint-2499/training_args.bin:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..f78d4c43df3553079fd444cc09a35dcab0da0dc0 Binary files /dev/null and b/checkpoint-2499/training_args.bin:com.dropbox.attrs differ diff --git a/checkpoint-3332/README.md b/checkpoint-3332/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda --- /dev/null +++ b/checkpoint-3332/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: mistralai/Mixtral-8x7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.7.0 \ No newline at end of file diff --git a/checkpoint-3332/README.md:com.dropbox.attrs b/checkpoint-3332/README.md:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..d4ea5a4819f5b88611f492ccaa1560c047b15425 Binary files /dev/null and b/checkpoint-3332/README.md:com.dropbox.attrs differ diff --git a/checkpoint-3332/adapter_config.json b/checkpoint-3332/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a --- /dev/null +++ b/checkpoint-3332/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "w1", + "gate", + "w2", + "q_proj", + "w3", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3332/adapter_config.json:com.dropbox.attrs b/checkpoint-3332/adapter_config.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..491ddfd74c4651b57be163d90b5d5352e977ffd2 Binary files /dev/null and b/checkpoint-3332/adapter_config.json:com.dropbox.attrs differ diff --git a/checkpoint-3332/adapter_model.safetensors b/checkpoint-3332/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7e28c34c5f984931729b039b7badc160f79d9f5 --- /dev/null +++ b/checkpoint-3332/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aba448d9e0bde1a7588f5911a70400a25fc5b580ca9a19bdc61064c309d44e2 +size 3875879784 diff --git a/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..0e564adbb59a865a9bb1c636791481c45b2e6521 Binary files /dev/null and b/checkpoint-3332/adapter_model.safetensors:com.dropbox.attrs differ diff --git a/checkpoint-3332/optimizer.pt b/checkpoint-3332/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4640a9051bb43e1ff546e09f47249ab93e14bb2 --- /dev/null +++ b/checkpoint-3332/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572a2148943aa0b9915e0c66ed54c6810942cd70ca870d8c80bd91a56f3b3062 +size 1943844127 diff --git a/checkpoint-3332/optimizer.pt:com.dropbox.attrs b/checkpoint-3332/optimizer.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..64d60e0cfadd824cf25a2fd9b8b963758c5c8c78 Binary files /dev/null and b/checkpoint-3332/optimizer.pt:com.dropbox.attrs differ diff --git a/checkpoint-3332/rng_state.pth b/checkpoint-3332/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..01ccf8262ab7e072475fa1f343d8ce0c70048143 --- /dev/null +++ b/checkpoint-3332/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f56fbf03556b83b300734506a4b44924fcfbbd1dc974be3566502841913603d +size 14575 diff --git a/checkpoint-3332/rng_state.pth:com.dropbox.attrs b/checkpoint-3332/rng_state.pth:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..93e60fd0b2e9606daa22118664c6d887be4ad152 Binary files /dev/null and b/checkpoint-3332/rng_state.pth:com.dropbox.attrs differ diff --git a/checkpoint-3332/scheduler.pt b/checkpoint-3332/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cda8916d448a3fc3573118ac5e6f0861698b1b3 --- /dev/null +++ b/checkpoint-3332/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f027f63a5866ebd7ac82e11e9e9f21867f7cc8ddb480ab63bacaf52ce8549a +size 627 diff --git a/checkpoint-3332/scheduler.pt:com.dropbox.attrs b/checkpoint-3332/scheduler.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..1f55f785aeb799daa383b2a9b6714f842d8fabc6 Binary files /dev/null and b/checkpoint-3332/scheduler.pt:com.dropbox.attrs differ diff --git a/checkpoint-3332/trainer_state.json b/checkpoint-3332/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..94c9a64b7097278604fe9790073231fc78896c8a --- /dev/null +++ b/checkpoint-3332/trainer_state.json @@ -0,0 +1,20141 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.956782713085234, + "eval_steps": 209, + "global_step": 3332, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.1426, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 2.071432113647461, + "eval_runtime": 279.6718, + "eval_samples_per_second": 0.737, + "eval_steps_per_second": 0.737, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.4033, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 2.1893, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 2.3226, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 2.2485, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.9704, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014, + "loss": 1.6929, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 2.2957, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018, + "loss": 1.9907, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 2.1295, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999287109068, + "loss": 2.2249, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999997148436365, + "loss": 2.1733, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199999935839822, + "loss": 2.1404, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999988593747084, + "loss": 2.0236, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999982177731722, + "loss": 1.9639, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999974335937034, + "loss": 1.692, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999965068364137, + "loss": 2.3609, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999954375014348, + "loss": 2.3553, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999942255889198, + "loss": 1.5733, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999928710990412, + "loss": 1.7505, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999913740319922, + "loss": 2.3068, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999897343879862, + "loss": 1.8371, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999987952167257, + "loss": 1.9852, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999860273700585, + "loss": 1.9625, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999839599966655, + "loss": 2.1089, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999817500473724, + "loss": 2.1086, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999793975224945, + "loss": 2.0284, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999769024223673, + "loss": 2.3641, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999742647473464, + "loss": 1.963, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999714844978078, + "loss": 2.0635, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999968561674148, + "loss": 1.9304, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999654962767839, + "loss": 1.4124, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999622883061518, + "loss": 2.1444, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999589377627102, + "loss": 1.6477, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955444646936, + "loss": 2.2601, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999518089593282, + "loss": 1.6256, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999948030700404, + "loss": 1.9155, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999441098707025, + "loss": 2.1408, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999400464707832, + "loss": 2.104, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999935840501225, + "loss": 1.9841, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999314919626272, + "loss": 1.5924, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999270008556108, + "loss": 1.9956, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999223671808154, + "loss": 1.4673, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999175909389018, + "loss": 2.1595, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999126721305513, + "loss": 1.8439, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999076107564648, + "loss": 1.9961, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999024068173638, + "loss": 2.1504, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998970603139912, + "loss": 2.2907, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999891571247108, + "loss": 1.5709, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999885939617498, + "loss": 2.4504, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998801654259632, + "loss": 2.3787, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999874248673328, + "loss": 2.0434, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998681893604347, + "loss": 2.1671, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999861987488148, + "loss": 1.7432, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998556430573521, + "loss": 1.7737, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998491560689513, + "loss": 2.0122, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999842526523871, + "loss": 1.7545, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998357544230558, + "loss": 2.201, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998288397674716, + "loss": 2.0396, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999821782558104, + "loss": 1.9275, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998145827959598, + "loss": 1.7797, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999807240482065, + "loss": 2.1463, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997997556174665, + "loss": 1.935, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999792128203232, + "loss": 2.1182, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999784358240448, + "loss": 2.2297, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997764457302234, + "loss": 2.1052, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999768390673686, + "loss": 2.0777, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997601930719835, + "loss": 2.1419, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999751852926286, + "loss": 2.2586, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997433702377817, + "loss": 1.9089, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997347450076801, + "loss": 2.0587, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997259772372116, + "loss": 2.4143, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997170669276256, + "loss": 1.947, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997080140801932, + "loss": 2.008, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996988186962041, + "loss": 2.4912, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996894807769707, + "loss": 2.0279, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996800003238232, + "loss": 1.9914, + "step": 77 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001999670377338114, + "loss": 1.9091, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996606118212148, + "loss": 1.8038, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996507037745183, + "loss": 2.3573, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996406531994364, + "loss": 2.3204, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999630460097403, + "loss": 2.1619, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999620124469871, + "loss": 1.9977, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996096463183142, + "loss": 2.195, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995990256442263, + "loss": 1.9909, + "step": 85 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995882624491217, + "loss": 2.2001, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995773567345354, + "loss": 1.5795, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995663085020212, + "loss": 2.174, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995551177531557, + "loss": 1.9605, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995437844895334, + "loss": 2.1768, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999532308712771, + "loss": 1.6906, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995206904245037, + "loss": 2.1029, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995089296263893, + "loss": 2.0652, + "step": 93 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994970263201035, + "loss": 2.1733, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999484980507344, + "loss": 1.9413, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999472792189828, + "loss": 1.9538, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994604613692935, + "loss": 2.4158, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994479880474988, + "loss": 1.8964, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999435372226222, + "loss": 2.3135, + "step": 99 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999422613907262, + "loss": 2.127, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994097130924374, + "loss": 1.9954, + "step": 101 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993966697835883, + "loss": 2.1363, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993834839825738, + "loss": 1.7779, + "step": 103 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993701556912742, + "loss": 2.0923, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993566849115898, + "loss": 1.9183, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993430716454413, + "loss": 1.7894, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993293158947694, + "loss": 2.0094, + "step": 107 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999315417661536, + "loss": 2.1469, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999301376947722, + "loss": 1.6924, + "step": 109 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999287193755329, + "loss": 2.1794, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199927286808638, + "loss": 2.1338, + "step": 111 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019992583999429178, + "loss": 1.9988, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999243789327004, + "loss": 2.0735, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999229036240723, + "loss": 2.0521, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019992141406861776, + "loss": 1.9441, + "step": 115 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991991026654918, + "loss": 2.1244, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999183922180809, + "loss": 1.7937, + "step": 117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999168599234295, + "loss": 2.2603, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991531338281332, + "loss": 2.1846, + "step": 119 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991375259645293, + "loss": 2.3241, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991217756457085, + "loss": 2.0926, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991058828739165, + "loss": 2.0092, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990898476514193, + "loss": 1.8076, + "step": 123 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990736699805029, + "loss": 2.0369, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990573498634742, + "loss": 2.0488, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199904088730266, + "loss": 2.1534, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990242823004074, + "loss": 2.1406, + "step": 127 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990075348590839, + "loss": 1.9379, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019989906449810775, + "loss": 1.9781, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989736126687963, + "loss": 1.973, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989564379246683, + "loss": 1.6825, + "step": 131 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989391207511428, + "loss": 2.0843, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989216611506887, + "loss": 1.8547, + "step": 133 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989040591257952, + "loss": 1.7626, + "step": 134 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001998886314678972, + "loss": 2.0531, + "step": 135 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988684278127497, + "loss": 2.0031, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988503985296773, + "loss": 1.9342, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988322268323268, + "loss": 2.3297, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988139127232878, + "loss": 2.3401, + "step": 139 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987954562051725, + "loss": 1.8983, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001998776857280612, + "loss": 2.0621, + "step": 141 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987581159522578, + "loss": 2.0574, + "step": 142 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987392322227824, + "loss": 1.9516, + "step": 143 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987202060948783, + "loss": 2.1402, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987010375712577, + "loss": 1.8903, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986817266546539, + "loss": 1.8248, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986622733478204, + "loss": 1.9877, + "step": 147 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986426776535306, + "loss": 1.6272, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986229395745785, + "loss": 1.8605, + "step": 149 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986030591137783, + "loss": 1.6848, + "step": 150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985830362739647, + "loss": 2.1922, + "step": 151 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998562871057992, + "loss": 2.0238, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998542563468736, + "loss": 2.2246, + "step": 153 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985221135090914, + "loss": 1.9438, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019985015211819744, + "loss": 2.2136, + "step": 155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998480786490321, + "loss": 2.4563, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984599094370874, + "loss": 2.2138, + "step": 157 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984388900252503, + "loss": 2.2679, + "step": 158 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984177282578064, + "loss": 1.9537, + "step": 159 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998396424137773, + "loss": 2.0803, + "step": 160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998374977668188, + "loss": 2.0282, + "step": 161 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019983533888521087, + "loss": 2.0157, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998331657692613, + "loss": 1.7837, + "step": 163 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019983097841928, + "loss": 2.1556, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982877683557879, + "loss": 2.1447, + "step": 165 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982656101847162, + "loss": 2.4139, + "step": 166 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998243309682743, + "loss": 1.6788, + "step": 167 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982208668530493, + "loss": 1.9008, + "step": 168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998198281698834, + "loss": 2.173, + "step": 169 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019981755542233177, + "loss": 2.1837, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981526844297404, + "loss": 2.0639, + "step": 171 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981296723213632, + "loss": 2.3864, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981065179014673, + "loss": 1.923, + "step": 173 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980832211733535, + "loss": 1.9192, + "step": 174 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980597821403438, + "loss": 2.0335, + "step": 175 + }, + { + "epoch": 0.21, + "learning_rate": 0.000199803620080578, + "loss": 1.8172, + "step": 176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001998012477173024, + "loss": 2.0294, + "step": 177 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979886112454586, + "loss": 2.2889, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979646030264867, + "loss": 1.8498, + "step": 179 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997940452519531, + "loss": 2.0797, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997916159728035, + "loss": 2.2356, + "step": 181 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997891724655462, + "loss": 2.1187, + "step": 182 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978671473052964, + "loss": 1.9301, + "step": 183 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978424276810423, + "loss": 1.8582, + "step": 184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997817565786224, + "loss": 2.144, + "step": 185 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977925616243862, + "loss": 2.0595, + "step": 186 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977674151990945, + "loss": 1.9104, + "step": 187 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977421265139332, + "loss": 1.9727, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977166955725088, + "loss": 1.8727, + "step": 189 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997691122378447, + "loss": 2.0611, + "step": 190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997665406935394, + "loss": 2.0745, + "step": 191 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997639549247016, + "loss": 1.9974, + "step": 192 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019976135493169996, + "loss": 1.9856, + "step": 193 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975874071490526, + "loss": 1.778, + "step": 194 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975611227469016, + "loss": 1.8347, + "step": 195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997534696114294, + "loss": 1.5555, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019975081272549989, + "loss": 1.5625, + "step": 197 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974814161728032, + "loss": 1.9997, + "step": 198 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974545628715157, + "loss": 1.9523, + "step": 199 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974275673549654, + "loss": 2.1557, + "step": 200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974004296270006, + "loss": 1.8306, + "step": 201 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973731496914914, + "loss": 2.0051, + "step": 202 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973457275523264, + "loss": 2.201, + "step": 203 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997318163213416, + "loss": 2.2446, + "step": 204 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972904566786903, + "loss": 2.1172, + "step": 205 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972626079520995, + "loss": 1.9849, + "step": 206 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972346170376142, + "loss": 1.9774, + "step": 207 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001997206483939225, + "loss": 1.7625, + "step": 208 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971782086609436, + "loss": 2.2346, + "step": 209 + }, + { + "epoch": 0.25, + "eval_loss": 2.00066876411438, + "eval_runtime": 282.7648, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 209 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971497912068013, + "loss": 2.4185, + "step": 210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971212315808497, + "loss": 1.946, + "step": 211 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019970925297871605, + "loss": 2.0049, + "step": 212 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970636858298267, + "loss": 1.9545, + "step": 213 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970346997129598, + "loss": 1.9636, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970055714406938, + "loss": 1.9068, + "step": 215 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969763010171807, + "loss": 1.5749, + "step": 216 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969468884465942, + "loss": 1.7676, + "step": 217 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996917333733128, + "loss": 2.0329, + "step": 218 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996887636880996, + "loss": 1.9307, + "step": 219 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019968577978944323, + "loss": 2.134, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019968278167776908, + "loss": 2.0911, + "step": 221 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967976935350467, + "loss": 2.5057, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001996767428170795, + "loss": 1.9267, + "step": 223 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967370206892503, + "loss": 2.3569, + "step": 224 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967064710947488, + "loss": 1.992, + "step": 225 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966757793916454, + "loss": 2.01, + "step": 226 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966449455843165, + "loss": 1.8037, + "step": 227 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966139696771587, + "loss": 2.2498, + "step": 228 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019965828516745876, + "loss": 1.6563, + "step": 229 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996551591581041, + "loss": 1.979, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996520189400975, + "loss": 2.1553, + "step": 231 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996488645138867, + "loss": 1.8743, + "step": 232 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964569587992148, + "loss": 2.1907, + "step": 233 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964251303865362, + "loss": 2.0644, + "step": 234 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019963931599053692, + "loss": 2.1721, + "step": 235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996361047360272, + "loss": 2.2267, + "step": 236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996328792755823, + "loss": 1.9445, + "step": 237 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019962963960966213, + "loss": 2.2003, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996263857387286, + "loss": 2.3114, + "step": 239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996231176632456, + "loss": 1.8553, + "step": 240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961983538367914, + "loss": 2.1349, + "step": 241 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961653890049715, + "loss": 1.8784, + "step": 242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996132282141697, + "loss": 2.0118, + "step": 243 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960990332516874, + "loss": 1.9938, + "step": 244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960656423396834, + "loss": 2.2582, + "step": 245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019960321094104465, + "loss": 2.1807, + "step": 246 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959984344687578, + "loss": 1.9084, + "step": 247 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959646175194174, + "loss": 2.2879, + "step": 248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995930658567248, + "loss": 1.942, + "step": 249 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958965576170908, + "loss": 2.1313, + "step": 250 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958623146738088, + "loss": 2.3202, + "step": 251 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995827929742283, + "loss": 1.7832, + "step": 252 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957934028274162, + "loss": 1.7103, + "step": 253 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957587339341321, + "loss": 1.9912, + "step": 254 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995723923067373, + "loss": 1.6686, + "step": 255 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956889702321023, + "loss": 1.966, + "step": 256 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956538754333034, + "loss": 2.2287, + "step": 257 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956186386759804, + "loss": 1.4866, + "step": 258 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995583259965157, + "loss": 1.9599, + "step": 259 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019955477393058773, + "loss": 1.9273, + "step": 260 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995512076703206, + "loss": 1.847, + "step": 261 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019954762721622279, + "loss": 2.0535, + "step": 262 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995440325688048, + "loss": 2.4403, + "step": 263 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019954042372857908, + "loss": 1.8712, + "step": 264 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953680069606026, + "loss": 2.1837, + "step": 265 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953316347176488, + "loss": 2.0398, + "step": 266 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995295120562115, + "loss": 2.1135, + "step": 267 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952584644992075, + "loss": 2.0358, + "step": 268 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952216665341526, + "loss": 2.3282, + "step": 269 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995184726672197, + "loss": 1.9741, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951476449186074, + "loss": 1.7523, + "step": 271 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951104212786712, + "loss": 2.1509, + "step": 272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001995073055757695, + "loss": 2.0865, + "step": 273 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019950355483610067, + "loss": 1.8972, + "step": 274 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019949978990939542, + "loss": 2.4693, + "step": 275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994960107961905, + "loss": 1.9307, + "step": 276 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994922174970248, + "loss": 2.0097, + "step": 277 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994884100124391, + "loss": 1.6561, + "step": 278 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994845883429763, + "loss": 2.3069, + "step": 279 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019948075248918124, + "loss": 2.0134, + "step": 280 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947690245160091, + "loss": 2.1061, + "step": 281 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947303823078416, + "loss": 2.0855, + "step": 282 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946915982728197, + "loss": 1.5672, + "step": 283 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001994652672416473, + "loss": 1.7289, + "step": 284 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946136047443522, + "loss": 1.9013, + "step": 285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945743952620268, + "loss": 2.3105, + "step": 286 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945350439750872, + "loss": 2.341, + "step": 287 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944955508891443, + "loss": 1.88, + "step": 288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994455916009829, + "loss": 1.913, + "step": 289 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944161393427922, + "loss": 1.9513, + "step": 290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943762208937053, + "loss": 2.3331, + "step": 291 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943361606682597, + "loss": 2.3024, + "step": 292 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942959586721672, + "loss": 2.2222, + "step": 293 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942556149111598, + "loss": 2.1003, + "step": 294 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994215129390989, + "loss": 1.9038, + "step": 295 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941745021174282, + "loss": 1.6068, + "step": 296 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941337330962693, + "loss": 1.8894, + "step": 297 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940928223333252, + "loss": 2.3158, + "step": 298 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001994051769834429, + "loss": 2.1015, + "step": 299 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940105756054337, + "loss": 2.1519, + "step": 300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939692396522127, + "loss": 1.7233, + "step": 301 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939277619806598, + "loss": 1.85, + "step": 302 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938861425966887, + "loss": 2.2368, + "step": 303 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938443815062335, + "loss": 1.765, + "step": 304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993802478715248, + "loss": 1.6333, + "step": 305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937604342297073, + "loss": 2.191, + "step": 306 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937182480556055, + "loss": 2.2402, + "step": 307 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019936759201989577, + "loss": 2.0568, + "step": 308 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993633450665799, + "loss": 2.4314, + "step": 309 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935908394621844, + "loss": 2.0556, + "step": 310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935480865941894, + "loss": 2.0988, + "step": 311 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935051920679094, + "loss": 2.0964, + "step": 312 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019934621558894607, + "loss": 1.9365, + "step": 313 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993418978064979, + "loss": 1.6224, + "step": 314 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933756586006202, + "loss": 2.144, + "step": 315 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933321975025616, + "loss": 2.2899, + "step": 316 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019932885947769992, + "loss": 1.8865, + "step": 317 + }, + { + "epoch": 0.38, + "learning_rate": 0.000199324485043015, + "loss": 2.3996, + "step": 318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993200964468251, + "loss": 1.3858, + "step": 319 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019931569368975588, + "loss": 2.2231, + "step": 320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019931127677243516, + "loss": 2.0537, + "step": 321 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930684569549264, + "loss": 2.1381, + "step": 322 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930240045956012, + "loss": 2.0152, + "step": 323 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992979410652714, + "loss": 2.0293, + "step": 324 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019929346751326228, + "loss": 1.7457, + "step": 325 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928897980417057, + "loss": 1.987, + "step": 326 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928447793863616, + "loss": 2.2451, + "step": 327 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019927996191730093, + "loss": 2.3312, + "step": 328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992754317408087, + "loss": 1.8771, + "step": 329 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992708874098054, + "loss": 1.833, + "step": 330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926632892493896, + "loss": 1.9343, + "step": 331 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926175628685937, + "loss": 2.2328, + "step": 332 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992571694962185, + "loss": 1.9916, + "step": 333 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992525685536704, + "loss": 1.9497, + "step": 334 + }, + { + "epoch": 0.4, + "learning_rate": 0.000199247953459871, + "loss": 2.029, + "step": 335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019924332421547835, + "loss": 2.0326, + "step": 336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992386808211525, + "loss": 2.6406, + "step": 337 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019923402327755546, + "loss": 2.3811, + "step": 338 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922935158535129, + "loss": 1.6143, + "step": 339 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922466574520608, + "loss": 2.2182, + "step": 340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921996575778794, + "loss": 2.218, + "step": 341 + }, + { + "epoch": 0.41, + "learning_rate": 0.000199215251623767, + "loss": 1.8615, + "step": 342 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921052334381534, + "loss": 2.165, + "step": 343 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019920578091860716, + "loss": 2.1627, + "step": 344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001992010243488186, + "loss": 2.154, + "step": 345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919625363512786, + "loss": 1.5966, + "step": 346 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919146877821512, + "loss": 2.0903, + "step": 347 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991866697787626, + "loss": 2.2322, + "step": 348 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019918185663745456, + "loss": 1.9319, + "step": 349 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917702935497725, + "loss": 2.1367, + "step": 350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917218793201886, + "loss": 2.1767, + "step": 351 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019916733236926976, + "loss": 2.1009, + "step": 352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991624626674222, + "loss": 2.1286, + "step": 353 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991575788271705, + "loss": 2.181, + "step": 354 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019915268084921101, + "loss": 2.12, + "step": 355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019914776873424206, + "loss": 1.9895, + "step": 356 + }, + { + "epoch": 0.43, + "learning_rate": 0.000199142842482964, + "loss": 1.9285, + "step": 357 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991379020960792, + "loss": 2.2376, + "step": 358 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991329475742921, + "loss": 2.1274, + "step": 359 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912797891830908, + "loss": 2.0043, + "step": 360 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912299612883852, + "loss": 2.022, + "step": 361 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019911799920659093, + "loss": 1.7343, + "step": 362 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991129881522787, + "loss": 2.0621, + "step": 363 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019910796296661632, + "loss": 1.5116, + "step": 364 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991029236503203, + "loss": 2.0485, + "step": 365 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909787020410907, + "loss": 1.971, + "step": 366 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909280262870324, + "loss": 1.9724, + "step": 367 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908772092482524, + "loss": 1.318, + "step": 368 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908262509319964, + "loss": 2.0539, + "step": 369 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019907751513455302, + "loss": 2.1097, + "step": 370 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019907239104961392, + "loss": 2.0632, + "step": 371 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906725283911296, + "loss": 2.1897, + "step": 372 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906210050378266, + "loss": 2.2002, + "step": 373 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905693404435773, + "loss": 1.9005, + "step": 374 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905175346157474, + "loss": 1.9873, + "step": 375 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904655875617233, + "loss": 1.7215, + "step": 376 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904134992889113, + "loss": 2.0434, + "step": 377 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903612698047383, + "loss": 2.4223, + "step": 378 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903088991166513, + "loss": 2.0837, + "step": 379 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902563872321172, + "loss": 2.2389, + "step": 380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902037341586225, + "loss": 1.7205, + "step": 381 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001990150939903675, + "loss": 1.9577, + "step": 382 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019900980044748015, + "loss": 1.8778, + "step": 383 + }, + { + "epoch": 0.46, + "learning_rate": 0.000199004492787955, + "loss": 2.2213, + "step": 384 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899917101254874, + "loss": 2.0927, + "step": 385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899383512202019, + "loss": 2.2921, + "step": 386 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001989884851171301, + "loss": 2.2983, + "step": 387 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989831209986413, + "loss": 1.8052, + "step": 388 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897774276731857, + "loss": 1.7741, + "step": 389 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897235042392873, + "loss": 1.779, + "step": 390 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896694396924063, + "loss": 1.6924, + "step": 391 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896152340402509, + "loss": 2.036, + "step": 392 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019895608872905494, + "loss": 2.04, + "step": 393 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989506399451051, + "loss": 2.1702, + "step": 394 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019894517705295245, + "loss": 1.9429, + "step": 395 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893970005337584, + "loss": 2.0528, + "step": 396 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893420894715618, + "loss": 1.7906, + "step": 397 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989287037350764, + "loss": 2.3494, + "step": 398 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019892318441792138, + "loss": 1.7415, + "step": 399 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989176509964781, + "loss": 2.0184, + "step": 400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989121034715355, + "loss": 1.9277, + "step": 401 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989065418438845, + "loss": 2.2168, + "step": 402 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019890096611431814, + "loss": 2.6114, + "step": 403 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019889537628363133, + "loss": 2.0713, + "step": 404 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888977235262104, + "loss": 2.2966, + "step": 405 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888415432208636, + "loss": 2.5206, + "step": 406 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887852219282822, + "loss": 2.4503, + "step": 407 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887287596564966, + "loss": 2.102, + "step": 408 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886721564135572, + "loss": 2.3275, + "step": 409 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886154122075343, + "loss": 2.0481, + "step": 410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885585270465182, + "loss": 1.8395, + "step": 411 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885015009386202, + "loss": 2.3535, + "step": 412 + }, + { + "epoch": 0.5, + "learning_rate": 0.000198844433389197, + "loss": 2.0147, + "step": 413 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988387025914719, + "loss": 2.1919, + "step": 414 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988329577015038, + "loss": 2.156, + "step": 415 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882719872011176, + "loss": 2.2672, + "step": 416 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882142564811694, + "loss": 2.3242, + "step": 417 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988156384863424, + "loss": 2.0259, + "step": 418 + }, + { + "epoch": 0.5, + "eval_loss": 1.9941134452819824, + "eval_runtime": 282.533, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 418 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880983723561332, + "loss": 1.7039, + "step": 419 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880402189675678, + "loss": 2.1007, + "step": 420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879819247060193, + "loss": 2.2297, + "step": 421 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879234895797996, + "loss": 1.6166, + "step": 422 + }, + { + "epoch": 0.51, + "learning_rate": 0.000198786491359724, + "loss": 2.408, + "step": 423 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019878061967666915, + "loss": 1.686, + "step": 424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001987747339096527, + "loss": 2.0492, + "step": 425 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876883405951377, + "loss": 2.2179, + "step": 426 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876292012709356, + "loss": 1.8812, + "step": 427 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019875699211323528, + "loss": 2.2888, + "step": 428 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019875105001878409, + "loss": 2.0561, + "step": 429 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019874509384458725, + "loss": 1.9299, + "step": 430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873912359149397, + "loss": 2.1999, + "step": 431 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873313926035548, + "loss": 1.8509, + "step": 432 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019872714085202503, + "loss": 1.8281, + "step": 433 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987211283673578, + "loss": 1.8359, + "step": 434 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987151018072111, + "loss": 2.2844, + "step": 435 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870906117244416, + "loss": 1.9397, + "step": 436 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870300646391824, + "loss": 2.302, + "step": 437 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869693768249661, + "loss": 2.1176, + "step": 438 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869085482904458, + "loss": 2.1909, + "step": 439 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986847579044294, + "loss": 2.2382, + "step": 440 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867864690952035, + "loss": 2.0988, + "step": 441 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867252184518878, + "loss": 2.2136, + "step": 442 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986663827123079, + "loss": 1.9324, + "step": 443 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019866022951175308, + "loss": 2.1274, + "step": 444 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019865406224440165, + "loss": 1.8625, + "step": 445 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019864788091113287, + "loss": 2.0009, + "step": 446 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001986416855128281, + "loss": 2.2245, + "step": 447 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019863547605037063, + "loss": 2.0654, + "step": 448 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862925252464586, + "loss": 1.4339, + "step": 449 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862301493654108, + "loss": 2.1347, + "step": 450 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861676328694562, + "loss": 1.7029, + "step": 451 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861049757675088, + "loss": 2.0081, + "step": 452 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019860421780685018, + "loss": 1.9994, + "step": 453 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985979239781389, + "loss": 1.9325, + "step": 454 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019859161609151436, + "loss": 1.8502, + "step": 455 + }, + { + "epoch": 0.55, + "learning_rate": 0.000198585294147876, + "loss": 2.3779, + "step": 456 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019857895814812509, + "loss": 2.0303, + "step": 457 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985726080931651, + "loss": 1.9898, + "step": 458 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019856624398390137, + "loss": 1.7648, + "step": 459 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019855986582124126, + "loss": 1.7822, + "step": 460 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985534736060942, + "loss": 1.9219, + "step": 461 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019854706733937155, + "loss": 2.1789, + "step": 462 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019854064702198675, + "loss": 1.9091, + "step": 463 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019853421265485514, + "loss": 1.9941, + "step": 464 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985277642388941, + "loss": 1.904, + "step": 465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019852130177502316, + "loss": 1.6299, + "step": 466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985148252641636, + "loss": 1.7712, + "step": 467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019850833470723886, + "loss": 1.6825, + "step": 468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985018301051744, + "loss": 1.7408, + "step": 469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019849531145889758, + "loss": 2.0622, + "step": 470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019848877876933784, + "loss": 1.5699, + "step": 471 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001984822320374266, + "loss": 2.0253, + "step": 472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019847567126409724, + "loss": 2.2186, + "step": 473 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019846909645028523, + "loss": 2.0872, + "step": 474 + }, + { + "epoch": 0.57, + "learning_rate": 0.000198462507596928, + "loss": 1.9362, + "step": 475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019845590470496497, + "loss": 2.4109, + "step": 476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844928777533753, + "loss": 2.2626, + "step": 477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844265680898918, + "loss": 2.0874, + "step": 478 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984360118068653, + "loss": 2.1606, + "step": 479 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984293527699133, + "loss": 2.063, + "step": 480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019842267969908265, + "loss": 1.9065, + "step": 481 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984159925953248, + "loss": 1.9511, + "step": 482 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840929145959317, + "loss": 2.056, + "step": 483 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840257629284317, + "loss": 2.2353, + "step": 484 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019839584709603226, + "loss": 1.9401, + "step": 485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001983891038701199, + "loss": 1.9648, + "step": 486 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019838234661606748, + "loss": 1.753, + "step": 487 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019837557533483846, + "loss": 1.7805, + "step": 488 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836879002739827, + "loss": 2.192, + "step": 489 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836199069471437, + "loss": 1.9112, + "step": 490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019835517733775615, + "loss": 2.0119, + "step": 491 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001983483499574951, + "loss": 1.8932, + "step": 492 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019834150855490464, + "loss": 1.5968, + "step": 493 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019833465313096017, + "loss": 2.1493, + "step": 494 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019832778368663917, + "loss": 1.8863, + "step": 495 + }, + { + "epoch": 0.6, + "learning_rate": 0.000198320900222921, + "loss": 2.2134, + "step": 496 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019831400274078717, + "loss": 2.2831, + "step": 497 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019830709124122112, + "loss": 2.0266, + "step": 498 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001983001657252082, + "loss": 2.3392, + "step": 499 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019829322619373588, + "loss": 1.8426, + "step": 500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019828627264779363, + "loss": 2.0742, + "step": 501 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001982793050883728, + "loss": 1.9578, + "step": 502 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019827232351646686, + "loss": 2.0863, + "step": 503 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982653279330712, + "loss": 2.2881, + "step": 504 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019825831833918323, + "loss": 1.8869, + "step": 505 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982512947358024, + "loss": 1.8997, + "step": 506 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019824425712393012, + "loss": 1.8945, + "step": 507 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019823720550456977, + "loss": 1.9496, + "step": 508 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982301398787268, + "loss": 2.1066, + "step": 509 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019822306024740852, + "loss": 1.958, + "step": 510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019821596661162447, + "loss": 2.1112, + "step": 511 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019820885897238596, + "loss": 2.1012, + "step": 512 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001982017373307064, + "loss": 2.2623, + "step": 513 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019819460168760117, + "loss": 2.5058, + "step": 514 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981874520440877, + "loss": 2.1367, + "step": 515 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019818028840118532, + "loss": 2.2743, + "step": 516 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019817311075991543, + "loss": 1.5517, + "step": 517 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981659191213014, + "loss": 1.9569, + "step": 518 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815871348636863, + "loss": 2.0566, + "step": 519 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815149385614444, + "loss": 1.8859, + "step": 520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019814426023165825, + "loss": 2.0298, + "step": 521 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019813701261394136, + "loss": 2.0614, + "step": 522 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812975100402715, + "loss": 2.221, + "step": 523 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812247540295096, + "loss": 2.1255, + "step": 524 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019811518581175014, + "loss": 2.1885, + "step": 525 + }, + { + "epoch": 0.63, + "learning_rate": 0.000198107882231464, + "loss": 2.3918, + "step": 526 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019810056466313392, + "loss": 2.2759, + "step": 527 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019809323310780318, + "loss": 1.9727, + "step": 528 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980858875665171, + "loss": 2.0417, + "step": 529 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019807852804032305, + "loss": 1.645, + "step": 530 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980711545302703, + "loss": 1.7943, + "step": 531 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019806376703741015, + "loss": 1.8844, + "step": 532 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019805636556279588, + "loss": 2.1128, + "step": 533 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980489501074828, + "loss": 2.0272, + "step": 534 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019804152067252816, + "loss": 2.0916, + "step": 535 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019803407725899131, + "loss": 1.7287, + "step": 536 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019802661986793342, + "loss": 2.0667, + "step": 537 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801914850041784, + "loss": 2.4016, + "step": 538 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801166315750978, + "loss": 1.8557, + "step": 539 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001980041638402765, + "loss": 1.8072, + "step": 540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019799665054978722, + "loss": 2.2252, + "step": 541 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798912328711322, + "loss": 2.1377, + "step": 542 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798158205332764, + "loss": 2.0306, + "step": 543 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019797402684950576, + "loss": 1.7428, + "step": 544 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019796645767672477, + "loss": 2.0843, + "step": 545 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795887453606388, + "loss": 1.9175, + "step": 546 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795127742860423, + "loss": 1.6673, + "step": 547 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001979436663554291, + "loss": 1.5553, + "step": 548 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019793604131762357, + "loss": 1.604, + "step": 549 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792840231627482, + "loss": 2.023, + "step": 550 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792074935247206, + "loss": 1.8399, + "step": 551 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019791308242730638, + "loss": 1.8579, + "step": 552 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019790540154187094, + "loss": 2.2135, + "step": 553 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019789770669726087, + "loss": 1.7894, + "step": 554 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788999789457326, + "loss": 2.1723, + "step": 555 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788227513490723, + "loss": 2.0881, + "step": 556 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019787453841936393, + "loss": 1.7181, + "step": 557 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019786678774904638, + "loss": 1.8725, + "step": 558 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785902312505964, + "loss": 2.0544, + "step": 559 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785124454851084, + "loss": 1.7503, + "step": 560 + }, + { + "epoch": 0.67, + "learning_rate": 0.000197843452020509, + "loss": 2.01, + "step": 561 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019783564554216518, + "loss": 1.748, + "step": 562 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978278251145924, + "loss": 2.0866, + "step": 563 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978199907389057, + "loss": 1.6046, + "step": 564 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019781214241622208, + "loss": 1.9222, + "step": 565 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019780428014766051, + "loss": 2.2003, + "step": 566 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019779640393434206, + "loss": 2.0534, + "step": 567 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001977885137773896, + "loss": 1.8609, + "step": 568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019778060967792817, + "loss": 2.0666, + "step": 569 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019777269163708468, + "loss": 1.9512, + "step": 570 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019776475965598814, + "loss": 1.8349, + "step": 571 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977568137357694, + "loss": 2.0507, + "step": 572 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019774885387756138, + "loss": 1.7588, + "step": 573 + }, + { + "epoch": 0.69, + "learning_rate": 0.000197740880082499, + "loss": 2.0981, + "step": 574 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019773289235171918, + "loss": 2.0953, + "step": 575 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019772489068636077, + "loss": 2.0678, + "step": 576 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019771687508756466, + "loss": 2.0136, + "step": 577 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977088455564736, + "loss": 1.9781, + "step": 578 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019770080209423254, + "loss": 2.2185, + "step": 579 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019769274470198827, + "loss": 1.8076, + "step": 580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019768467338088957, + "loss": 1.6888, + "step": 581 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019767658813208726, + "loss": 2.1273, + "step": 582 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001976684889567341, + "loss": 2.3232, + "step": 583 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019766037585598487, + "loss": 2.366, + "step": 584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019765224883099635, + "loss": 1.8939, + "step": 585 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019764410788292722, + "loss": 2.0162, + "step": 586 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019763595301293822, + "loss": 2.2752, + "step": 587 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976277842221921, + "loss": 1.9461, + "step": 588 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976196015118535, + "loss": 1.9999, + "step": 589 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976114048830891, + "loss": 2.0169, + "step": 590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019760319433706757, + "loss": 2.1838, + "step": 591 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019759496987495955, + "loss": 2.3513, + "step": 592 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975867314979377, + "loss": 1.9915, + "step": 593 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975784792071766, + "loss": 2.1973, + "step": 594 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019757021300385286, + "loss": 2.3112, + "step": 595 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019756193288914507, + "loss": 2.0992, + "step": 596 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019755363886423376, + "loss": 2.4266, + "step": 597 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019754533093030148, + "loss": 1.7649, + "step": 598 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975370090885328, + "loss": 1.7573, + "step": 599 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019752867334011423, + "loss": 1.7949, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975203236862342, + "loss": 2.0229, + "step": 601 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019751196012808325, + "loss": 2.0519, + "step": 602 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019750358266685383, + "loss": 2.0829, + "step": 603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019749519130374038, + "loss": 2.0153, + "step": 604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019748678603993933, + "loss": 1.8594, + "step": 605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019747836687664908, + "loss": 2.1385, + "step": 606 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746993381507003, + "loss": 2.1317, + "step": 607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746148685640451, + "loss": 1.1676, + "step": 608 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974530260018569, + "loss": 2.2856, + "step": 609 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974445512526336, + "loss": 2.1973, + "step": 610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019743606260994278, + "loss": 1.6912, + "step": 611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019742756007499486, + "loss": 1.8091, + "step": 612 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741904364900208, + "loss": 2.0108, + "step": 613 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741051333317867, + "loss": 2.1061, + "step": 614 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019740196912874087, + "loss": 1.8934, + "step": 615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019739341103690693, + "loss": 1.8599, + "step": 616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019738483905889703, + "loss": 2.0025, + "step": 617 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019737625319593335, + "loss": 1.8247, + "step": 618 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019736765344924005, + "loss": 2.222, + "step": 619 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019735903982004324, + "loss": 2.116, + "step": 620 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973504123095711, + "loss": 1.9183, + "step": 621 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973417709190536, + "loss": 2.1507, + "step": 622 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019733311564972296, + "loss": 1.7899, + "step": 623 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019732444650281315, + "loss": 2.1005, + "step": 624 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973157634795602, + "loss": 2.2391, + "step": 625 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019730706658120214, + "loss": 1.9466, + "step": 626 + }, + { + "epoch": 0.75, + "learning_rate": 0.000197298355808979, + "loss": 1.9854, + "step": 627 + }, + { + "epoch": 0.75, + "eval_loss": 1.9957869052886963, + "eval_runtime": 282.5544, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 627 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019728963116413266, + "loss": 2.1877, + "step": 628 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019728089264790712, + "loss": 2.2194, + "step": 629 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019727214026154827, + "loss": 1.9631, + "step": 630 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019726337400630405, + "loss": 2.3506, + "step": 631 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019725459388342432, + "loss": 2.0543, + "step": 632 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972457998941609, + "loss": 2.0402, + "step": 633 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019723699203976766, + "loss": 1.9316, + "step": 634 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972281703215004, + "loss": 2.2024, + "step": 635 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721933474061692, + "loss": 1.6776, + "step": 636 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721048529837694, + "loss": 1.9757, + "step": 637 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019720162199604222, + "loss": 1.7631, + "step": 638 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019719274483487648, + "loss": 2.34, + "step": 639 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001971838538161454, + "loss": 1.8469, + "step": 640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019717494894111662, + "loss": 2.3151, + "step": 641 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019716603021105987, + "loss": 2.0661, + "step": 642 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019715709762724667, + "loss": 2.0408, + "step": 643 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019714815119095062, + "loss": 1.9848, + "step": 644 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019713919090344736, + "loss": 2.3134, + "step": 645 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019713021676601438, + "loss": 2.4947, + "step": 646 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001971212287799312, + "loss": 2.0515, + "step": 647 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019711222694647932, + "loss": 2.6216, + "step": 648 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019710321126694216, + "loss": 1.6517, + "step": 649 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001970941817426052, + "loss": 2.0408, + "step": 650 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019708513837475588, + "loss": 1.8841, + "step": 651 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019707608116468356, + "loss": 2.1966, + "step": 652 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019706701011367955, + "loss": 1.7587, + "step": 653 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970579252230373, + "loss": 2.2196, + "step": 654 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019704882649405198, + "loss": 1.8146, + "step": 655 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703971392802098, + "loss": 2.2932, + "step": 656 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703058752624353, + "loss": 1.923, + "step": 657 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970214472900208, + "loss": 2.2393, + "step": 658 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019701229322065605, + "loss": 1.7338, + "step": 659 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019700312531945442, + "loss": 1.7859, + "step": 660 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019699394358772306, + "loss": 2.2719, + "step": 661 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019698474802677107, + "loss": 1.576, + "step": 662 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019697553863790956, + "loss": 2.3333, + "step": 663 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019696631542245156, + "loss": 2.3508, + "step": 664 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019695707838171216, + "loss": 2.1876, + "step": 665 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019694782751700828, + "loss": 1.4863, + "step": 666 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019693856282965898, + "loss": 1.8948, + "step": 667 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019692928432098512, + "loss": 1.6867, + "step": 668 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691999199230963, + "loss": 1.7682, + "step": 669 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691068584495742, + "loss": 2.0914, + "step": 670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019690136588025535, + "loss": 2.1413, + "step": 671 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019689203209953223, + "loss": 2.1275, + "step": 672 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001968826845041188, + "loss": 1.9556, + "step": 673 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019687332309534792, + "loss": 2.2209, + "step": 674 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019686394787455424, + "loss": 1.9853, + "step": 675 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019685455884307454, + "loss": 2.0877, + "step": 676 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019684515600224743, + "loss": 2.1607, + "step": 677 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019683573935341358, + "loss": 2.2664, + "step": 678 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019682630889791556, + "loss": 1.8527, + "step": 679 + }, + { + "epoch": 0.82, + "learning_rate": 0.000196816864637098, + "loss": 1.8417, + "step": 680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019680740657230738, + "loss": 1.9853, + "step": 681 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019679793470489228, + "loss": 1.8419, + "step": 682 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019678844903620317, + "loss": 1.9971, + "step": 683 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019677894956759246, + "loss": 1.9843, + "step": 684 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019676943630041462, + "loss": 2.376, + "step": 685 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675990923602598, + "loss": 2.1558, + "step": 686 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675036837578494, + "loss": 1.5752, + "step": 687 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001967408137210518, + "loss": 1.6704, + "step": 688 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019673124527318881, + "loss": 2.1389, + "step": 689 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019672166303356028, + "loss": 2.126, + "step": 690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019671206700353237, + "loss": 1.9402, + "step": 691 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019670245718447335, + "loss": 1.6701, + "step": 692 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019669283357775328, + "loss": 1.8134, + "step": 693 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001966831961847443, + "loss": 2.1642, + "step": 694 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019667354500682054, + "loss": 1.8455, + "step": 695 + }, + { + "epoch": 0.84, + "learning_rate": 0.000196663880045358, + "loss": 1.9646, + "step": 696 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966542013017347, + "loss": 1.9855, + "step": 697 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019664450877733062, + "loss": 1.7029, + "step": 698 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019663480247352773, + "loss": 1.9789, + "step": 699 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966250823917099, + "loss": 1.8751, + "step": 700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019661534853326301, + "loss": 2.3644, + "step": 701 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019660560089957492, + "loss": 1.8006, + "step": 702 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001965958394920354, + "loss": 2.2799, + "step": 703 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019658606431203622, + "loss": 1.9258, + "step": 704 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965762753609711, + "loss": 1.9521, + "step": 705 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019656647264023575, + "loss": 1.9675, + "step": 706 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019655665615122783, + "loss": 2.3686, + "step": 707 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019654682589534693, + "loss": 2.1448, + "step": 708 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019653698187399466, + "loss": 2.2475, + "step": 709 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965271240885745, + "loss": 1.9417, + "step": 710 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965172525404921, + "loss": 2.154, + "step": 711 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019650736723115475, + "loss": 2.0646, + "step": 712 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019649746816197196, + "loss": 2.235, + "step": 713 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019648755533435518, + "loss": 1.7122, + "step": 714 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019647762874971765, + "loss": 2.0635, + "step": 715 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019646768840947474, + "loss": 1.8904, + "step": 716 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019645773431504373, + "loss": 1.608, + "step": 717 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019644776646784388, + "loss": 2.2307, + "step": 718 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001964377848692963, + "loss": 2.176, + "step": 719 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019642778952082426, + "loss": 2.1984, + "step": 720 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001964177804238528, + "loss": 2.2625, + "step": 721 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019640775757980903, + "loss": 2.3142, + "step": 722 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019639772099012197, + "loss": 2.2366, + "step": 723 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019638767065622266, + "loss": 1.7823, + "step": 724 + }, + { + "epoch": 0.87, + "learning_rate": 0.000196377606579544, + "loss": 2.0677, + "step": 725 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019636752876152095, + "loss": 1.3337, + "step": 726 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019635743720359037, + "loss": 2.055, + "step": 727 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001963473319071911, + "loss": 1.9888, + "step": 728 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019633721287376393, + "loss": 1.9258, + "step": 729 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019632708010475165, + "loss": 2.3768, + "step": 730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001963169336015989, + "loss": 1.993, + "step": 731 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019630677336575242, + "loss": 2.1989, + "step": 732 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001962965993986608, + "loss": 2.1216, + "step": 733 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019628641170177464, + "loss": 2.2217, + "step": 734 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019627621027654648, + "loss": 1.8809, + "step": 735 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019626599512443077, + "loss": 2.0864, + "step": 736 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019625576624688406, + "loss": 2.0627, + "step": 737 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019624552364536473, + "loss": 2.1347, + "step": 738 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019623526732133315, + "loss": 1.9998, + "step": 739 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019622499727625162, + "loss": 2.1998, + "step": 740 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019621471351158443, + "loss": 1.974, + "step": 741 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019620441602879787, + "loss": 1.9425, + "step": 742 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019619410482936008, + "loss": 2.6227, + "step": 743 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019618377991474124, + "loss": 2.1209, + "step": 744 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019617344128641345, + "loss": 2.0606, + "step": 745 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019616308894585078, + "loss": 2.296, + "step": 746 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019615272289452923, + "loss": 2.0415, + "step": 747 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961423431339268, + "loss": 1.9516, + "step": 748 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961319496655234, + "loss": 2.0468, + "step": 749 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961215424908009, + "loss": 1.877, + "step": 750 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961111216112432, + "loss": 1.8129, + "step": 751 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019610068702833596, + "loss": 1.9984, + "step": 752 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019609023874356707, + "loss": 1.9013, + "step": 753 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019607977675842615, + "loss": 2.0546, + "step": 754 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019606930107440485, + "loss": 2.2817, + "step": 755 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960588116929968, + "loss": 2.0578, + "step": 756 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019604830861569755, + "loss": 2.3521, + "step": 757 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019603779184400457, + "loss": 2.0392, + "step": 758 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960272613794174, + "loss": 1.9863, + "step": 759 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019601671722343738, + "loss": 2.1889, + "step": 760 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960061593775679, + "loss": 2.0908, + "step": 761 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001959955878433143, + "loss": 1.986, + "step": 762 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019598500262218386, + "loss": 2.0339, + "step": 763 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019597440371568574, + "loss": 2.0958, + "step": 764 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001959637911253312, + "loss": 1.9866, + "step": 765 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019595316485263327, + "loss": 2.2228, + "step": 766 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019594252489910706, + "loss": 1.915, + "step": 767 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019593187126626965, + "loss": 2.0741, + "step": 768 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019592120395563994, + "loss": 2.5346, + "step": 769 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019591052296873888, + "loss": 2.4908, + "step": 770 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019589982830708937, + "loss": 2.1042, + "step": 771 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019588911997221625, + "loss": 1.8676, + "step": 772 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958783979656462, + "loss": 1.9152, + "step": 773 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019586766228890806, + "loss": 1.7784, + "step": 774 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958569129435324, + "loss": 2.0784, + "step": 775 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958461499310519, + "loss": 1.7262, + "step": 776 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019583537325300118, + "loss": 2.4154, + "step": 777 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019582458291091663, + "loss": 2.3185, + "step": 778 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019581377890633684, + "loss": 2.0981, + "step": 779 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019580296124080212, + "loss": 1.8952, + "step": 780 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019579212991585493, + "loss": 1.7208, + "step": 781 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019578128493303955, + "loss": 2.0209, + "step": 782 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019577042629390217, + "loss": 2.1867, + "step": 783 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957595539999911, + "loss": 2.0805, + "step": 784 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019574866805285645, + "loss": 2.0451, + "step": 785 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019573776845405028, + "loss": 2.2056, + "step": 786 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957268552051267, + "loss": 2.0773, + "step": 787 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019571592830764165, + "loss": 2.2036, + "step": 788 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019570498776315309, + "loss": 1.7298, + "step": 789 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956940335732209, + "loss": 1.8931, + "step": 790 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956830657394069, + "loss": 2.1567, + "step": 791 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019567208426327488, + "loss": 1.9471, + "step": 792 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019566108914639054, + "loss": 1.8916, + "step": 793 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019565008039032158, + "loss": 2.0111, + "step": 794 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019563905799663752, + "loss": 2.1374, + "step": 795 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019562802196691003, + "loss": 2.3083, + "step": 796 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019561697230271254, + "loss": 2.0381, + "step": 797 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001956059090056205, + "loss": 2.1909, + "step": 798 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019559483207721133, + "loss": 1.9893, + "step": 799 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955837415190643, + "loss": 2.3178, + "step": 800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955726373327607, + "loss": 2.0815, + "step": 801 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019556151951988376, + "loss": 1.6012, + "step": 802 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019555038808201865, + "loss": 1.4965, + "step": 803 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019553924302075242, + "loss": 2.3069, + "step": 804 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019552808433767415, + "loss": 2.2388, + "step": 805 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019551691203437482, + "loss": 2.5662, + "step": 806 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019550572611244738, + "loss": 1.9419, + "step": 807 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019549452657348663, + "loss": 2.3638, + "step": 808 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019548331341908947, + "loss": 2.1567, + "step": 809 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019547208665085457, + "loss": 1.9697, + "step": 810 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019546084627038268, + "loss": 1.9006, + "step": 811 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001954495922792764, + "loss": 2.304, + "step": 812 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001954383246791403, + "loss": 2.0494, + "step": 813 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019542704347158093, + "loss": 1.8562, + "step": 814 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019541574865820672, + "loss": 2.1041, + "step": 815 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019540444024062804, + "loss": 2.22, + "step": 816 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019539311822045727, + "loss": 1.9925, + "step": 817 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019538178259930869, + "loss": 2.3213, + "step": 818 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019537043337879845, + "loss": 2.0319, + "step": 819 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019535907056054475, + "loss": 1.8578, + "step": 820 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019534769414616764, + "loss": 1.4115, + "step": 821 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953363041372892, + "loss": 2.0731, + "step": 822 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019532490053553335, + "loss": 2.0605, + "step": 823 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019531348334252607, + "loss": 1.9044, + "step": 824 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953020525598951, + "loss": 1.7405, + "step": 825 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001952906081892703, + "loss": 1.898, + "step": 826 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019527915023228332, + "loss": 1.9696, + "step": 827 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019526767869056788, + "loss": 2.0469, + "step": 828 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019525619356575952, + "loss": 2.0307, + "step": 829 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019524469485949583, + "loss": 2.002, + "step": 830 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019523318257341622, + "loss": 1.9438, + "step": 831 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019522165670916207, + "loss": 1.535, + "step": 832 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001952101172683768, + "loss": 1.7505, + "step": 833 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019519856425270562, + "loss": 2.2248, + "step": 834 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019518699766379576, + "loss": 2.0669, + "step": 835 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019517541750329635, + "loss": 2.0268, + "step": 836 + }, + { + "epoch": 1.0, + "eval_loss": 1.9969017505645752, + "eval_runtime": 283.3157, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 836 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019516382377285848, + "loss": 1.6712, + "step": 837 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001951522164741352, + "loss": 2.1558, + "step": 838 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019514059560878138, + "loss": 2.1599, + "step": 839 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019512896117845392, + "loss": 1.8762, + "step": 840 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019511731318481168, + "loss": 2.0189, + "step": 841 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019510565162951537, + "loss": 1.9364, + "step": 842 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019509397651422769, + "loss": 1.7319, + "step": 843 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019508228784061326, + "loss": 1.9424, + "step": 844 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001950705856103386, + "loss": 2.277, + "step": 845 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019505886982507225, + "loss": 1.6511, + "step": 846 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950471404864846, + "loss": 1.9056, + "step": 847 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019503539759624798, + "loss": 1.5105, + "step": 848 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950236411560367, + "loss": 1.9469, + "step": 849 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019501187116752693, + "loss": 1.5012, + "step": 850 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019500008763239683, + "loss": 1.7086, + "step": 851 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019498829055232647, + "loss": 1.5586, + "step": 852 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019497647992899788, + "loss": 1.5573, + "step": 853 + }, + { + "epoch": 1.01, + "learning_rate": 0.000194964655764095, + "loss": 2.0757, + "step": 854 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019495281805930367, + "loss": 1.5478, + "step": 855 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019494096681631172, + "loss": 1.7068, + "step": 856 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019492910203680884, + "loss": 1.6759, + "step": 857 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001949172237224867, + "loss": 1.4621, + "step": 858 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019490533187503892, + "loss": 1.5359, + "step": 859 + }, + { + "epoch": 1.02, + "learning_rate": 0.000194893426496161, + "loss": 1.9365, + "step": 860 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019488150758755035, + "loss": 1.7089, + "step": 861 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019486957515090641, + "loss": 1.4924, + "step": 862 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019485762918793046, + "loss": 1.387, + "step": 863 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001948456697003257, + "loss": 1.631, + "step": 864 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019483369668979732, + "loss": 1.7953, + "step": 865 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019482171015805245, + "loss": 1.7552, + "step": 866 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019480971010680002, + "loss": 1.8313, + "step": 867 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019479769653775106, + "loss": 1.593, + "step": 868 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019478566945261837, + "loss": 1.9506, + "step": 869 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019477362885311682, + "loss": 1.9598, + "step": 870 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947615747409631, + "loss": 1.7324, + "step": 871 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019474950711787585, + "loss": 2.1208, + "step": 872 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947374259855757, + "loss": 1.4111, + "step": 873 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019472533134578507, + "loss": 1.6696, + "step": 874 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019471322320022849, + "loss": 1.6999, + "step": 875 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019470110155063225, + "loss": 2.1287, + "step": 876 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019468896639872468, + "loss": 1.874, + "step": 877 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019467681774623592, + "loss": 1.7149, + "step": 878 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019466465559489816, + "loss": 1.9563, + "step": 879 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019465247994644545, + "loss": 1.3504, + "step": 880 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019464029080261378, + "loss": 1.6176, + "step": 881 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019462808816514103, + "loss": 1.7577, + "step": 882 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019461587203576706, + "loss": 1.8054, + "step": 883 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019460364241623358, + "loss": 2.0246, + "step": 884 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019459139930828428, + "loss": 1.7645, + "step": 885 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945791427136648, + "loss": 1.9225, + "step": 886 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019456687263412262, + "loss": 1.8967, + "step": 887 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945545890714072, + "loss": 1.5287, + "step": 888 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945422920272699, + "loss": 1.5033, + "step": 889 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019452998150346401, + "loss": 2.0148, + "step": 890 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945176575017448, + "loss": 1.3706, + "step": 891 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001945053200238693, + "loss": 1.7603, + "step": 892 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019449296907159667, + "loss": 1.9884, + "step": 893 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019448060464668783, + "loss": 1.6133, + "step": 894 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019446822675090565, + "loss": 1.7885, + "step": 895 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019445583538601498, + "loss": 1.8573, + "step": 896 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944434305537826, + "loss": 1.7241, + "step": 897 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944310122559771, + "loss": 1.8942, + "step": 898 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944185804943691, + "loss": 1.7541, + "step": 899 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019440613527073105, + "loss": 1.9608, + "step": 900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019439367658683745, + "loss": 2.0969, + "step": 901 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019438120444446457, + "loss": 2.2589, + "step": 902 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943687188453907, + "loss": 1.7335, + "step": 903 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019435621979139596, + "loss": 1.8663, + "step": 904 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019434370728426252, + "loss": 1.5627, + "step": 905 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943311813257743, + "loss": 1.6101, + "step": 906 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019431864191771732, + "loss": 1.9661, + "step": 907 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943060890618794, + "loss": 1.6487, + "step": 908 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019429352276005026, + "loss": 2.1282, + "step": 909 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019428094301402162, + "loss": 1.6944, + "step": 910 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019426834982558705, + "loss": 1.2433, + "step": 911 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019425574319654213, + "loss": 1.5735, + "step": 912 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019424312312868417, + "loss": 1.6499, + "step": 913 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019423048962381265, + "loss": 1.8366, + "step": 914 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019421784268372876, + "loss": 1.906, + "step": 915 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019420518231023568, + "loss": 1.5976, + "step": 916 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941925085051385, + "loss": 1.6722, + "step": 917 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019417982127024422, + "loss": 1.8832, + "step": 918 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019416712060736183, + "loss": 1.8865, + "step": 919 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019415440651830208, + "loss": 1.6627, + "step": 920 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941416790048778, + "loss": 1.3598, + "step": 921 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019412893806890357, + "loss": 2.0506, + "step": 922 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019411618371219605, + "loss": 1.9794, + "step": 923 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941034159365737, + "loss": 1.7851, + "step": 924 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001940906347438569, + "loss": 1.8312, + "step": 925 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019407784013586804, + "loss": 1.5167, + "step": 926 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019406503211443128, + "loss": 1.5725, + "step": 927 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019405221068137277, + "loss": 1.8857, + "step": 928 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019403937583852061, + "loss": 1.741, + "step": 929 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019402652758770475, + "loss": 1.6748, + "step": 930 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019401366593075706, + "loss": 1.7285, + "step": 931 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019400079086951135, + "loss": 1.7545, + "step": 932 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019398790240580333, + "loss": 1.4491, + "step": 933 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019397500054147058, + "loss": 1.3359, + "step": 934 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019396208527835263, + "loss": 1.9567, + "step": 935 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001939491566182909, + "loss": 2.0011, + "step": 936 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019393621456312881, + "loss": 1.9076, + "step": 937 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019392325911471155, + "loss": 1.5388, + "step": 938 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019391029027488629, + "loss": 1.2337, + "step": 939 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019389730804550211, + "loss": 1.5752, + "step": 940 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019388431242840998, + "loss": 1.9131, + "step": 941 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019387130342546284, + "loss": 1.4177, + "step": 942 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019385828103851544, + "loss": 1.5865, + "step": 943 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001938452452694245, + "loss": 1.6335, + "step": 944 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019383219612004865, + "loss": 1.8599, + "step": 945 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019381913359224842, + "loss": 1.3035, + "step": 946 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019380605768788621, + "loss": 1.7586, + "step": 947 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001937929684088264, + "loss": 1.7334, + "step": 948 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019377986575693518, + "loss": 1.5749, + "step": 949 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019376674973408075, + "loss": 1.874, + "step": 950 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019375362034213314, + "loss": 2.3055, + "step": 951 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019374047758296433, + "loss": 1.5801, + "step": 952 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001937273214584482, + "loss": 1.8788, + "step": 953 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019371415197046052, + "loss": 2.431, + "step": 954 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019370096912087897, + "loss": 1.4963, + "step": 955 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001936877729115831, + "loss": 1.514, + "step": 956 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019367456334445446, + "loss": 1.6099, + "step": 957 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019366134042137642, + "loss": 1.9367, + "step": 958 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019364810414423427, + "loss": 1.7384, + "step": 959 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019363485451491524, + "loss": 1.6166, + "step": 960 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019362159153530844, + "loss": 1.955, + "step": 961 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019360831520730482, + "loss": 1.4189, + "step": 962 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019359502553279736, + "loss": 1.4506, + "step": 963 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019358172251368087, + "loss": 1.7108, + "step": 964 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019356840615185203, + "loss": 1.6641, + "step": 965 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019355507644920952, + "loss": 1.7506, + "step": 966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019354173340765382, + "loss": 2.0598, + "step": 967 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935283770290874, + "loss": 1.3494, + "step": 968 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019351500731541453, + "loss": 1.6571, + "step": 969 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935016242685415, + "loss": 1.6403, + "step": 970 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019348822789037637, + "loss": 1.7555, + "step": 971 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019347481818282925, + "loss": 2.1451, + "step": 972 + }, + { + "epoch": 1.15, + "learning_rate": 0.000193461395147812, + "loss": 1.4522, + "step": 973 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934479587872385, + "loss": 1.7147, + "step": 974 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934345091030245, + "loss": 1.3909, + "step": 975 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019342104609708756, + "loss": 1.8104, + "step": 976 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019340756977134728, + "loss": 1.5221, + "step": 977 + }, + { + "epoch": 1.16, + "learning_rate": 0.000193394080127725, + "loss": 1.9447, + "step": 978 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001933805771681442, + "loss": 1.5742, + "step": 979 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019336706089452996, + "loss": 1.5312, + "step": 980 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019335353130880948, + "loss": 1.4304, + "step": 981 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019333998841291177, + "loss": 1.8379, + "step": 982 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019332643220876773, + "loss": 1.877, + "step": 983 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001933128626983102, + "loss": 1.9627, + "step": 984 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932992798834739, + "loss": 1.7857, + "step": 985 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019328568376619543, + "loss": 1.3189, + "step": 986 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019327207434841333, + "loss": 1.9588, + "step": 987 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019325845163206795, + "loss": 1.3132, + "step": 988 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019324481561910163, + "loss": 1.6304, + "step": 989 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932311663114586, + "loss": 1.8322, + "step": 990 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019321750371108486, + "loss": 1.4192, + "step": 991 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001932038278199285, + "loss": 1.3915, + "step": 992 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019319013863993933, + "loss": 1.8433, + "step": 993 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931764361730692, + "loss": 2.1459, + "step": 994 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931627204212717, + "loss": 1.9799, + "step": 995 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019314899138650243, + "loss": 1.855, + "step": 996 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019313524907071887, + "loss": 1.4763, + "step": 997 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019312149347588037, + "loss": 2.0128, + "step": 998 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019310772460394814, + "loss": 1.6964, + "step": 999 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001930939424568854, + "loss": 1.5864, + "step": 1000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019308014703665712, + "loss": 1.8437, + "step": 1001 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019306633834523024, + "loss": 2.1677, + "step": 1002 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019305251638457356, + "loss": 1.8872, + "step": 1003 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930386811566578, + "loss": 1.7312, + "step": 1004 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930248326634556, + "loss": 1.6772, + "step": 1005 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019301097090694143, + "loss": 1.9666, + "step": 1006 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019299709588909165, + "loss": 1.8946, + "step": 1007 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019298320761188453, + "loss": 2.1784, + "step": 1008 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001929693060773003, + "loss": 2.0249, + "step": 1009 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019295539128732093, + "loss": 1.717, + "step": 1010 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019294146324393046, + "loss": 1.8671, + "step": 1011 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019292752194911464, + "loss": 1.8388, + "step": 1012 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019291356740486123, + "loss": 1.9111, + "step": 1013 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019289959961315986, + "loss": 1.5287, + "step": 1014 + }, + { + "epoch": 1.2, + "learning_rate": 0.000192885618576002, + "loss": 1.5669, + "step": 1015 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019287162429538105, + "loss": 1.9095, + "step": 1016 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019285761677329232, + "loss": 1.9133, + "step": 1017 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019284359601173294, + "loss": 2.1099, + "step": 1018 + }, + { + "epoch": 1.21, + "learning_rate": 0.000192829562012702, + "loss": 1.6303, + "step": 1019 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019281551477820036, + "loss": 1.5907, + "step": 1020 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019280145431023097, + "loss": 1.4897, + "step": 1021 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019278738061079845, + "loss": 1.7414, + "step": 1022 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019277329368190942, + "loss": 1.816, + "step": 1023 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019275919352557241, + "loss": 1.5033, + "step": 1024 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019274508014379777, + "loss": 1.7923, + "step": 1025 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019273095353859775, + "loss": 1.3094, + "step": 1026 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019271681371198652, + "loss": 1.7689, + "step": 1027 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001927026606659801, + "loss": 1.8019, + "step": 1028 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019268849440259639, + "loss": 1.8818, + "step": 1029 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019267431492385521, + "loss": 1.7442, + "step": 1030 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019266012223177824, + "loss": 2.045, + "step": 1031 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019264591632838903, + "loss": 1.7842, + "step": 1032 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019263169721571308, + "loss": 1.5289, + "step": 1033 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019261746489577765, + "loss": 1.6013, + "step": 1034 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019260321937061202, + "loss": 1.7912, + "step": 1035 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925889606422473, + "loss": 1.7573, + "step": 1036 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925746887127164, + "loss": 1.7368, + "step": 1037 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019256040358405424, + "loss": 1.7497, + "step": 1038 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019254610525829758, + "loss": 2.0042, + "step": 1039 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019253179373748504, + "loss": 2.0732, + "step": 1040 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019251746902365708, + "loss": 1.8878, + "step": 1041 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019250313111885618, + "loss": 1.9404, + "step": 1042 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019248878002512654, + "loss": 1.5535, + "step": 1043 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019247441574451432, + "loss": 1.9344, + "step": 1044 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001924600382790676, + "loss": 1.9696, + "step": 1045 + }, + { + "epoch": 1.24, + "eval_loss": 2.064669609069824, + "eval_runtime": 283.003, + "eval_samples_per_second": 0.728, + "eval_steps_per_second": 0.728, + "step": 1045 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019244564763083624, + "loss": 1.4577, + "step": 1046 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019243124380187204, + "loss": 2.1324, + "step": 1047 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019241682679422873, + "loss": 1.4713, + "step": 1048 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019240239660996177, + "loss": 1.7455, + "step": 1049 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001923879532511287, + "loss": 1.5372, + "step": 1050 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019237349671978872, + "loss": 2.0984, + "step": 1051 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923590270180031, + "loss": 1.5023, + "step": 1052 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923445441478348, + "loss": 2.0826, + "step": 1053 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019233004811134886, + "loss": 1.7448, + "step": 1054 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019231553891061208, + "loss": 2.0249, + "step": 1055 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019230101654769312, + "loss": 1.6144, + "step": 1056 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001922864810246626, + "loss": 1.9193, + "step": 1057 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019227193234359292, + "loss": 2.0057, + "step": 1058 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019225737050655842, + "loss": 1.9493, + "step": 1059 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019224279551563532, + "loss": 1.9545, + "step": 1060 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001922282073729017, + "loss": 1.8983, + "step": 1061 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019221360608043746, + "loss": 1.9414, + "step": 1062 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019219899164032447, + "loss": 1.8471, + "step": 1063 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921843640546464, + "loss": 1.7568, + "step": 1064 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019216972332548887, + "loss": 2.0737, + "step": 1065 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921550694549393, + "loss": 1.6109, + "step": 1066 + }, + { + "epoch": 1.27, + "learning_rate": 0.000192140402445087, + "loss": 1.6684, + "step": 1067 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001921257222980232, + "loss": 1.5101, + "step": 1068 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019211102901584094, + "loss": 1.5262, + "step": 1069 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920963226006352, + "loss": 1.9757, + "step": 1070 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019208160305450272, + "loss": 2.038, + "step": 1071 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019206687037954224, + "loss": 1.4755, + "step": 1072 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019205212457785434, + "loss": 1.7406, + "step": 1073 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019203736565154137, + "loss": 1.9564, + "step": 1074 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920225936027077, + "loss": 1.823, + "step": 1075 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001920078084334595, + "loss": 1.8275, + "step": 1076 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919930101459048, + "loss": 1.7106, + "step": 1077 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019197819874215347, + "loss": 1.5958, + "step": 1078 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019196337422431735, + "loss": 2.1478, + "step": 1079 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919485365945101, + "loss": 1.7238, + "step": 1080 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019193368585484718, + "loss": 2.0758, + "step": 1081 + }, + { + "epoch": 1.28, + "learning_rate": 0.000191918822007446, + "loss": 1.8403, + "step": 1082 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019190394505442585, + "loss": 1.8286, + "step": 1083 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019188905499790789, + "loss": 1.6992, + "step": 1084 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019187415184001503, + "loss": 1.8512, + "step": 1085 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918592355828722, + "loss": 1.8236, + "step": 1086 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918443062286061, + "loss": 1.6173, + "step": 1087 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019182936377934535, + "loss": 1.8593, + "step": 1088 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918144082372204, + "loss": 1.8184, + "step": 1089 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019179943960436358, + "loss": 1.9655, + "step": 1090 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019178445788290915, + "loss": 1.5858, + "step": 1091 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019176946307499312, + "loss": 1.8359, + "step": 1092 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917544551827534, + "loss": 1.4354, + "step": 1093 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019173943420832984, + "loss": 1.4312, + "step": 1094 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917244001538641, + "loss": 2.0024, + "step": 1095 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019170935302149965, + "loss": 1.5994, + "step": 1096 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019169429281338195, + "loss": 2.05, + "step": 1097 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019167921953165825, + "loss": 1.8746, + "step": 1098 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019166413317847763, + "loss": 2.0071, + "step": 1099 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019164903375599112, + "loss": 2.0331, + "step": 1100 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019163392126635154, + "loss": 1.3587, + "step": 1101 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019161879571171362, + "loss": 1.6144, + "step": 1102 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019160365709423388, + "loss": 1.4845, + "step": 1103 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019158850541607083, + "loss": 1.4511, + "step": 1104 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019157334067938474, + "loss": 1.8015, + "step": 1105 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019155816288633776, + "loss": 1.5029, + "step": 1106 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019154297203909394, + "loss": 1.7102, + "step": 1107 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019152776813981912, + "loss": 1.6661, + "step": 1108 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001915125511906811, + "loss": 1.5872, + "step": 1109 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019149732119384943, + "loss": 1.7868, + "step": 1110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914820781514956, + "loss": 1.6365, + "step": 1111 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914668220657929, + "loss": 2.3434, + "step": 1112 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914515529389166, + "loss": 1.6458, + "step": 1113 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914362707730437, + "loss": 1.7061, + "step": 1114 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019142097557035308, + "loss": 1.8606, + "step": 1115 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019140566733302552, + "loss": 1.9415, + "step": 1116 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019139034606324362, + "loss": 1.7411, + "step": 1117 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019137501176319193, + "loss": 1.9404, + "step": 1118 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913596644350567, + "loss": 1.802, + "step": 1119 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019134430408102615, + "loss": 1.2244, + "step": 1120 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019132893070329036, + "loss": 1.902, + "step": 1121 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913135443040412, + "loss": 1.4578, + "step": 1122 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019129814488547247, + "loss": 1.6816, + "step": 1123 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001912827324497798, + "loss": 1.7293, + "step": 1124 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019126730699916061, + "loss": 1.6344, + "step": 1125 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001912518685358143, + "loss": 1.6819, + "step": 1126 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019123641706194199, + "loss": 1.6761, + "step": 1127 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019122095257974677, + "loss": 1.9222, + "step": 1128 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019120547509143354, + "loss": 1.6117, + "step": 1129 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019118998459920902, + "loss": 1.688, + "step": 1130 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019117448110528184, + "loss": 1.8383, + "step": 1131 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019115896461186245, + "loss": 1.5225, + "step": 1132 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019114343512116318, + "loss": 2.0376, + "step": 1133 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019112789263539813, + "loss": 1.5632, + "step": 1134 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019111233715678343, + "loss": 1.7049, + "step": 1135 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910967686875369, + "loss": 1.4992, + "step": 1136 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019108118722987826, + "loss": 1.7949, + "step": 1137 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019106559278602903, + "loss": 1.4688, + "step": 1138 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019104998535821274, + "loss": 1.4031, + "step": 1139 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910343649486546, + "loss": 2.1757, + "step": 1140 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019101873155958179, + "loss": 1.622, + "step": 1141 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019100308519322322, + "loss": 1.9441, + "step": 1142 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001909874258518098, + "loss": 1.8065, + "step": 1143 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019097175353757417, + "loss": 1.8348, + "step": 1144 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019095606825275083, + "loss": 2.0519, + "step": 1145 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019094036999957624, + "loss": 1.9172, + "step": 1146 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019092465878028854, + "loss": 1.9961, + "step": 1147 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019090893459712787, + "loss": 2.1239, + "step": 1148 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019089319745233611, + "loss": 1.3481, + "step": 1149 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019087744734815708, + "loss": 1.5035, + "step": 1150 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019086168428683638, + "loss": 1.818, + "step": 1151 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019084590827062145, + "loss": 2.0481, + "step": 1152 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019083011930176165, + "loss": 1.4444, + "step": 1153 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019081431738250814, + "loss": 1.6059, + "step": 1154 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907985025151139, + "loss": 2.0284, + "step": 1155 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907826747018338, + "loss": 1.8603, + "step": 1156 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019076683394492455, + "loss": 1.7189, + "step": 1157 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019075098024664468, + "loss": 1.7497, + "step": 1158 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019073511360925458, + "loss": 1.7489, + "step": 1159 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001907192340350165, + "loss": 1.6059, + "step": 1160 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019070334152619453, + "loss": 1.4407, + "step": 1161 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019068743608505455, + "loss": 1.7025, + "step": 1162 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019067151771386438, + "loss": 1.7921, + "step": 1163 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906555864148936, + "loss": 1.6147, + "step": 1164 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906396421904137, + "loss": 1.6192, + "step": 1165 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019062368504269795, + "loss": 1.4341, + "step": 1166 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019060771497402147, + "loss": 1.3054, + "step": 1167 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001905917319866613, + "loss": 2.041, + "step": 1168 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019057573608289623, + "loss": 2.004, + "step": 1169 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019055972726500695, + "loss": 1.4002, + "step": 1170 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019054370553527595, + "loss": 1.5554, + "step": 1171 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019052767089598754, + "loss": 1.9783, + "step": 1172 + }, + { + "epoch": 1.39, + "learning_rate": 0.000190511623349428, + "loss": 1.7443, + "step": 1173 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019049556289788528, + "loss": 1.6089, + "step": 1174 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001904794895436493, + "loss": 1.8784, + "step": 1175 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904634032890117, + "loss": 2.0985, + "step": 1176 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904473041362661, + "loss": 1.811, + "step": 1177 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019043119208770793, + "loss": 1.407, + "step": 1178 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904150671456343, + "loss": 1.7269, + "step": 1179 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019039892931234435, + "loss": 1.8374, + "step": 1180 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019038277859013896, + "loss": 1.583, + "step": 1181 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019036661498132086, + "loss": 1.6407, + "step": 1182 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019035043848819464, + "loss": 2.0828, + "step": 1183 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019033424911306672, + "loss": 1.7067, + "step": 1184 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019031804685824534, + "loss": 1.55, + "step": 1185 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001903018317260406, + "loss": 1.7573, + "step": 1186 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019028560371876446, + "loss": 1.5666, + "step": 1187 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001902693628387306, + "loss": 1.5192, + "step": 1188 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019025310908825466, + "loss": 2.0093, + "step": 1189 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019023684246965406, + "loss": 1.8414, + "step": 1190 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019022056298524808, + "loss": 1.3696, + "step": 1191 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019020427063735782, + "loss": 1.6336, + "step": 1192 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019018796542830617, + "loss": 1.8528, + "step": 1193 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019017164736041795, + "loss": 2.0523, + "step": 1194 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019015531643601973, + "loss": 1.7526, + "step": 1195 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019013897265743998, + "loss": 1.8391, + "step": 1196 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019012261602700892, + "loss": 1.4257, + "step": 1197 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019010624654705867, + "loss": 2.0911, + "step": 1198 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001900898642199232, + "loss": 1.7578, + "step": 1199 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019007346904793818, + "loss": 1.9003, + "step": 1200 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900570610334413, + "loss": 1.3918, + "step": 1201 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900406401787719, + "loss": 2.0365, + "step": 1202 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019002420648627131, + "loss": 1.5184, + "step": 1203 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019000775995828254, + "loss": 1.6412, + "step": 1204 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018999130059715058, + "loss": 1.5031, + "step": 1205 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018997482840522217, + "loss": 1.4421, + "step": 1206 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018995834338484584, + "loss": 1.9431, + "step": 1207 + }, + { + "epoch": 1.43, + "learning_rate": 0.000189941845538372, + "loss": 1.8141, + "step": 1208 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001899253348681529, + "loss": 1.7289, + "step": 1209 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018990881137654258, + "loss": 1.7217, + "step": 1210 + }, + { + "epoch": 1.44, + "learning_rate": 0.000189892275065897, + "loss": 2.3727, + "step": 1211 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018987572593857381, + "loss": 1.4833, + "step": 1212 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018985916399693256, + "loss": 2.13, + "step": 1213 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018984258924333464, + "loss": 1.875, + "step": 1214 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018982600168014323, + "loss": 1.783, + "step": 1215 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018980940130972337, + "loss": 1.6815, + "step": 1216 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897927881344419, + "loss": 2.049, + "step": 1217 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018977616215666752, + "loss": 1.918, + "step": 1218 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897595233787707, + "loss": 1.5824, + "step": 1219 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018974287180312377, + "loss": 1.7473, + "step": 1220 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018972620743210093, + "loss": 1.6915, + "step": 1221 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897095302680781, + "loss": 1.7633, + "step": 1222 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018969284031343308, + "loss": 1.6921, + "step": 1223 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018967613757054554, + "loss": 1.5433, + "step": 1224 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018965942204179686, + "loss": 1.9389, + "step": 1225 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018964269372957038, + "loss": 1.5625, + "step": 1226 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018962595263625115, + "loss": 1.4835, + "step": 1227 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018960919876422611, + "loss": 1.8479, + "step": 1228 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018959243211588397, + "loss": 1.7861, + "step": 1229 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018957565269361531, + "loss": 1.867, + "step": 1230 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018955886049981245, + "loss": 1.9383, + "step": 1231 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001895420555368697, + "loss": 1.755, + "step": 1232 + }, + { + "epoch": 1.46, + "learning_rate": 0.000189525237807183, + "loss": 1.5166, + "step": 1233 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018950840731315024, + "loss": 1.8629, + "step": 1234 + }, + { + "epoch": 1.47, + "learning_rate": 0.000189491564057171, + "loss": 1.6845, + "step": 1235 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018947470804164685, + "loss": 1.4748, + "step": 1236 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018945783926898105, + "loss": 1.8907, + "step": 1237 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018944095774157873, + "loss": 1.5758, + "step": 1238 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018942406346184683, + "loss": 1.6367, + "step": 1239 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018940715643219407, + "loss": 1.7285, + "step": 1240 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018939023665503108, + "loss": 1.5714, + "step": 1241 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001893733041327702, + "loss": 1.9308, + "step": 1242 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018935635886782568, + "loss": 1.9153, + "step": 1243 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018933940086261351, + "loss": 1.8009, + "step": 1244 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018932243011955154, + "loss": 1.7392, + "step": 1245 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018930544664105944, + "loss": 1.821, + "step": 1246 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001892884504295587, + "loss": 1.475, + "step": 1247 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018927144148747255, + "loss": 1.8937, + "step": 1248 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018925441981722618, + "loss": 1.6958, + "step": 1249 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018923738542124644, + "loss": 1.6836, + "step": 1250 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018922033830196208, + "loss": 2.0213, + "step": 1251 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018920327846180365, + "loss": 1.9572, + "step": 1252 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018918620590320352, + "loss": 1.9449, + "step": 1253 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018916912062859583, + "loss": 1.7297, + "step": 1254 + }, + { + "epoch": 1.49, + "eval_loss": 2.0551259517669678, + "eval_runtime": 283.8338, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1254 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018915202264041664, + "loss": 1.8158, + "step": 1255 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891349119411037, + "loss": 1.921, + "step": 1256 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018911778853309658, + "loss": 1.5726, + "step": 1257 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891006524188368, + "loss": 1.6641, + "step": 1258 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018908350360076752, + "loss": 1.5841, + "step": 1259 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018906634208133385, + "loss": 1.8567, + "step": 1260 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018904916786298257, + "loss": 1.5584, + "step": 1261 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018903198094816242, + "loss": 1.6615, + "step": 1262 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018901478133932385, + "loss": 1.7477, + "step": 1263 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018899756903891914, + "loss": 1.3796, + "step": 1264 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018898034404940238, + "loss": 1.7991, + "step": 1265 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018896310637322953, + "loss": 1.4944, + "step": 1266 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018894585601285827, + "loss": 1.5719, + "step": 1267 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018892859297074812, + "loss": 1.5495, + "step": 1268 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018891131724936043, + "loss": 1.7611, + "step": 1269 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018889402885115833, + "loss": 1.5991, + "step": 1270 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018887672777860676, + "loss": 1.8849, + "step": 1271 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888594140341725, + "loss": 1.6136, + "step": 1272 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888420876203241, + "loss": 1.8288, + "step": 1273 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888247485395319, + "loss": 1.6625, + "step": 1274 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018880739679426816, + "loss": 1.49, + "step": 1275 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018879003238700675, + "loss": 1.874, + "step": 1276 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018877265532022352, + "loss": 1.751, + "step": 1277 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018875526559639604, + "loss": 1.9882, + "step": 1278 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018873786321800374, + "loss": 1.5214, + "step": 1279 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001887204481875278, + "loss": 1.741, + "step": 1280 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018870302050745118, + "loss": 1.7798, + "step": 1281 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018868558018025878, + "loss": 1.9258, + "step": 1282 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001886681272084371, + "loss": 1.9096, + "step": 1283 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018865066159447466, + "loss": 1.6729, + "step": 1284 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018863318334086157, + "loss": 1.6239, + "step": 1285 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018861569245008994, + "loss": 1.9857, + "step": 1286 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018859818892465354, + "loss": 1.9905, + "step": 1287 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188580672767048, + "loss": 2.0073, + "step": 1288 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018856314397977075, + "loss": 1.7109, + "step": 1289 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188545602565321, + "loss": 1.3727, + "step": 1290 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018852804852619975, + "loss": 1.7045, + "step": 1291 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018851048186490992, + "loss": 1.9042, + "step": 1292 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018849290258395602, + "loss": 1.7174, + "step": 1293 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018847531068584452, + "loss": 1.6502, + "step": 1294 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018845770617308366, + "loss": 1.8582, + "step": 1295 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001884400890481834, + "loss": 1.4846, + "step": 1296 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018842245931365562, + "loss": 1.5428, + "step": 1297 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018840481697201392, + "loss": 1.7266, + "step": 1298 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001883871620257737, + "loss": 1.9324, + "step": 1299 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018836949447745215, + "loss": 1.577, + "step": 1300 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001883518143295683, + "loss": 1.6388, + "step": 1301 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018833412158464298, + "loss": 1.9201, + "step": 1302 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018831641624519877, + "loss": 1.6478, + "step": 1303 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018829869831376005, + "loss": 1.6826, + "step": 1304 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018828096779285303, + "loss": 1.8513, + "step": 1305 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018826322468500566, + "loss": 1.571, + "step": 1306 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018824546899274777, + "loss": 1.1602, + "step": 1307 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001882277007186109, + "loss": 1.9998, + "step": 1308 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001882099198651284, + "loss": 1.7034, + "step": 1309 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001881921264348355, + "loss": 1.4031, + "step": 1310 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018817432043026911, + "loss": 1.8413, + "step": 1311 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018815650185396797, + "loss": 1.6606, + "step": 1312 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018813867070847264, + "loss": 1.5792, + "step": 1313 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018812082699632546, + "loss": 1.4525, + "step": 1314 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018810297072007054, + "loss": 1.4906, + "step": 1315 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018808510188225377, + "loss": 1.6284, + "step": 1316 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880672204854229, + "loss": 1.7281, + "step": 1317 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880493265321274, + "loss": 1.5345, + "step": 1318 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018803142002491856, + "loss": 2.0933, + "step": 1319 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018801350096634946, + "loss": 1.9372, + "step": 1320 + }, + { + "epoch": 1.57, + "learning_rate": 0.000187995569358975, + "loss": 1.7151, + "step": 1321 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018797762520535177, + "loss": 1.4823, + "step": 1322 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001879596685080383, + "loss": 2.0495, + "step": 1323 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018794169926959474, + "loss": 2.2966, + "step": 1324 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018792371749258314, + "loss": 1.7868, + "step": 1325 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018790572317956735, + "loss": 1.9403, + "step": 1326 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018788771633311292, + "loss": 1.6687, + "step": 1327 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018786969695578723, + "loss": 1.8422, + "step": 1328 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018785166505015948, + "loss": 1.5916, + "step": 1329 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018783362061880062, + "loss": 1.9119, + "step": 1330 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018781556366428336, + "loss": 1.4903, + "step": 1331 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018779749418918227, + "loss": 1.9497, + "step": 1332 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018777941219607364, + "loss": 1.9462, + "step": 1333 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018776131768753556, + "loss": 2.0474, + "step": 1334 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018774321066614795, + "loss": 1.4474, + "step": 1335 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018772509113449245, + "loss": 1.8315, + "step": 1336 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018770695909515247, + "loss": 1.7684, + "step": 1337 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018768881455071332, + "loss": 1.2675, + "step": 1338 + }, + { + "epoch": 1.59, + "learning_rate": 0.000187670657503762, + "loss": 1.8226, + "step": 1339 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018765248795688726, + "loss": 2.2112, + "step": 1340 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001876343059126797, + "loss": 1.3627, + "step": 1341 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018761611137373173, + "loss": 2.1488, + "step": 1342 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018759790434263744, + "loss": 1.9842, + "step": 1343 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018757968482199276, + "loss": 1.9775, + "step": 1344 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018756145281439545, + "loss": 1.6835, + "step": 1345 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875432083224449, + "loss": 1.5272, + "step": 1346 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875249513487425, + "loss": 1.7539, + "step": 1347 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018750668189589117, + "loss": 1.874, + "step": 1348 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018748839996649583, + "loss": 1.5858, + "step": 1349 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018747010556316305, + "loss": 1.9298, + "step": 1350 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001874517986885012, + "loss": 1.5079, + "step": 1351 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018743347934512046, + "loss": 1.884, + "step": 1352 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018741514753563277, + "loss": 1.7978, + "step": 1353 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873968032626518, + "loss": 1.7735, + "step": 1354 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018737844652879312, + "loss": 1.7227, + "step": 1355 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018736007733667393, + "loss": 1.8458, + "step": 1356 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018734169568891334, + "loss": 1.3268, + "step": 1357 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873233015881321, + "loss": 1.3782, + "step": 1358 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018730489503695287, + "loss": 1.9614, + "step": 1359 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018728647603800003, + "loss": 1.7755, + "step": 1360 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018726804459389963, + "loss": 1.7961, + "step": 1361 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018724960070727972, + "loss": 1.7158, + "step": 1362 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872311443807699, + "loss": 1.6303, + "step": 1363 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872126756170017, + "loss": 1.8734, + "step": 1364 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018719419441860834, + "loss": 1.5143, + "step": 1365 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001871757007882248, + "loss": 1.498, + "step": 1366 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001871571947284879, + "loss": 1.0886, + "step": 1367 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018713867624203621, + "loss": 1.6633, + "step": 1368 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018712014533151008, + "loss": 1.8895, + "step": 1369 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018710160199955156, + "loss": 1.4178, + "step": 1370 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018708304624880456, + "loss": 1.6814, + "step": 1371 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001870644780819147, + "loss": 1.8671, + "step": 1372 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018704589750152944, + "loss": 1.4786, + "step": 1373 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018702730451029796, + "loss": 1.8622, + "step": 1374 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018700869911087115, + "loss": 1.8891, + "step": 1375 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869900813059018, + "loss": 2.0493, + "step": 1376 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018697145109804436, + "loss": 1.7238, + "step": 1377 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018695280848995513, + "loss": 1.7826, + "step": 1378 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869341534842921, + "loss": 1.8557, + "step": 1379 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869154860837151, + "loss": 1.7492, + "step": 1380 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868968062908857, + "loss": 1.7441, + "step": 1381 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868781141084672, + "loss": 1.8322, + "step": 1382 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868594095391247, + "loss": 1.8177, + "step": 1383 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018684069258552508, + "loss": 2.0001, + "step": 1384 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018682196325033696, + "loss": 1.5046, + "step": 1385 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018680322153623075, + "loss": 1.6789, + "step": 1386 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867844674458786, + "loss": 1.6951, + "step": 1387 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018676570098195443, + "loss": 2.0334, + "step": 1388 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018674692214713388, + "loss": 1.7833, + "step": 1389 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867281309440945, + "loss": 1.82, + "step": 1390 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018670932737551547, + "loss": 1.8155, + "step": 1391 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018669051144407775, + "loss": 1.7912, + "step": 1392 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018667168315246406, + "loss": 1.5816, + "step": 1393 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018665284250335895, + "loss": 1.7521, + "step": 1394 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018663398949944865, + "loss": 1.4287, + "step": 1395 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018661512414342127, + "loss": 1.6026, + "step": 1396 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018659624643796647, + "loss": 1.6953, + "step": 1397 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018657735638577587, + "loss": 1.8515, + "step": 1398 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018655845398954276, + "loss": 2.0384, + "step": 1399 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018653953925196225, + "loss": 1.5458, + "step": 1400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018652061217573114, + "loss": 1.7166, + "step": 1401 + }, + { + "epoch": 1.67, + "learning_rate": 0.000186501672763548, + "loss": 1.5653, + "step": 1402 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018648272101811318, + "loss": 2.0928, + "step": 1403 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018646375694212884, + "loss": 1.605, + "step": 1404 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018644478053829878, + "loss": 1.4734, + "step": 1405 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018642579180932865, + "loss": 2.0578, + "step": 1406 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018640679075792582, + "loss": 1.9823, + "step": 1407 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018638777738679943, + "loss": 2.0551, + "step": 1408 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018636875169866036, + "loss": 1.6315, + "step": 1409 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863497136962213, + "loss": 1.8965, + "step": 1410 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863306633821966, + "loss": 1.3584, + "step": 1411 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018631160075930245, + "loss": 1.9673, + "step": 1412 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018629252583025676, + "loss": 1.5277, + "step": 1413 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001862734385977792, + "loss": 1.6788, + "step": 1414 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018625433906459116, + "loss": 1.432, + "step": 1415 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018623522723341588, + "loss": 1.8102, + "step": 1416 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018621610310697823, + "loss": 1.6713, + "step": 1417 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018619696668800492, + "loss": 1.6989, + "step": 1418 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001861778179792244, + "loss": 1.7645, + "step": 1419 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018615865698336684, + "loss": 1.594, + "step": 1420 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018613948370316415, + "loss": 1.8751, + "step": 1421 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018612029814135014, + "loss": 1.64, + "step": 1422 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018610110030066007, + "loss": 1.5066, + "step": 1423 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001860818901838313, + "loss": 1.9817, + "step": 1424 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018606266779360266, + "loss": 2.056, + "step": 1425 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860434331327149, + "loss": 1.6997, + "step": 1426 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018602418620391044, + "loss": 1.5573, + "step": 1427 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860049270099335, + "loss": 1.8427, + "step": 1428 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018598565555353, + "loss": 2.012, + "step": 1429 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018596637183744763, + "loss": 1.7976, + "step": 1430 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018594707586443585, + "loss": 1.4, + "step": 1431 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001859277676372458, + "loss": 1.8717, + "step": 1432 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018590844715863045, + "loss": 1.4311, + "step": 1433 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018588911443134448, + "loss": 1.5903, + "step": 1434 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018586976945814425, + "loss": 2.0898, + "step": 1435 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018585041224178803, + "loss": 1.5302, + "step": 1436 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018583104278503568, + "loss": 1.9582, + "step": 1437 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018581166109064886, + "loss": 1.5264, + "step": 1438 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018579226716139096, + "loss": 1.6551, + "step": 1439 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018577286100002723, + "loss": 1.7774, + "step": 1440 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018575344260932444, + "loss": 1.8316, + "step": 1441 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001857340119920513, + "loss": 1.3916, + "step": 1442 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018571456915097818, + "loss": 1.6728, + "step": 1443 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856951140888772, + "loss": 1.7247, + "step": 1444 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018567564680852224, + "loss": 1.4539, + "step": 1445 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018565616731268888, + "loss": 1.613, + "step": 1446 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856366756041545, + "loss": 1.757, + "step": 1447 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018561717168569816, + "loss": 1.6903, + "step": 1448 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018559765556010072, + "loss": 1.7322, + "step": 1449 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018557812723014476, + "loss": 1.5627, + "step": 1450 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018555858669861458, + "loss": 1.8751, + "step": 1451 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018553903396829625, + "loss": 1.2721, + "step": 1452 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018551946904197752, + "loss": 1.8167, + "step": 1453 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018549989192244797, + "loss": 1.6602, + "step": 1454 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018548030261249885, + "loss": 1.9053, + "step": 1455 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018546070111492315, + "loss": 1.7721, + "step": 1456 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018544108743251566, + "loss": 2.1421, + "step": 1457 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018542146156807284, + "loss": 1.5076, + "step": 1458 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018540182352439288, + "loss": 1.9039, + "step": 1459 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018538217330427582, + "loss": 1.9777, + "step": 1460 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018536251091052323, + "loss": 1.5702, + "step": 1461 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018534283634593862, + "loss": 1.851, + "step": 1462 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018532314961332717, + "loss": 1.5337, + "step": 1463 + }, + { + "epoch": 1.74, + "eval_loss": 2.068387508392334, + "eval_runtime": 283.4638, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 1463 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018530345071549574, + "loss": 1.7553, + "step": 1464 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018528373965525296, + "loss": 1.4175, + "step": 1465 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018526401643540922, + "loss": 1.7216, + "step": 1466 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018524428105877664, + "loss": 1.6415, + "step": 1467 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018522453352816896, + "loss": 1.7284, + "step": 1468 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018520477384640187, + "loss": 1.8314, + "step": 1469 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018518500201629258, + "loss": 1.8341, + "step": 1470 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018516521804066015, + "loss": 1.4129, + "step": 1471 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018514542192232537, + "loss": 1.4671, + "step": 1472 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018512561366411067, + "loss": 1.6665, + "step": 1473 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018510579326884034, + "loss": 1.5722, + "step": 1474 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850859607393403, + "loss": 1.9348, + "step": 1475 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850661160784383, + "loss": 1.5404, + "step": 1476 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018504625928896363, + "loss": 1.4769, + "step": 1477 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018502639037374757, + "loss": 1.4149, + "step": 1478 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850065093356229, + "loss": 1.958, + "step": 1479 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018498661617742426, + "loss": 1.8319, + "step": 1480 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018496671090198797, + "loss": 1.5948, + "step": 1481 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001849467935121521, + "loss": 1.8469, + "step": 1482 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018492686401075644, + "loss": 1.6798, + "step": 1483 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001849069224006425, + "loss": 1.8197, + "step": 1484 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848869686846535, + "loss": 1.6613, + "step": 1485 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848670028656344, + "loss": 1.7322, + "step": 1486 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018484702494643188, + "loss": 2.0493, + "step": 1487 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018482703492989444, + "loss": 1.7182, + "step": 1488 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018480703281887215, + "loss": 1.689, + "step": 1489 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018478701861621686, + "loss": 1.9477, + "step": 1490 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001847669923247822, + "loss": 1.8171, + "step": 1491 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018474695394742345, + "loss": 1.7337, + "step": 1492 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847269034869977, + "loss": 1.6983, + "step": 1493 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847068409463636, + "loss": 1.6445, + "step": 1494 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846867663283818, + "loss": 1.9965, + "step": 1495 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846666796359143, + "loss": 1.6775, + "step": 1496 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846465808718252, + "loss": 1.8117, + "step": 1497 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018462647003898006, + "loss": 1.8803, + "step": 1498 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018460634714024624, + "loss": 1.3045, + "step": 1499 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018458621217849286, + "loss": 1.7768, + "step": 1500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018456606515659073, + "loss": 2.0641, + "step": 1501 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001845459060774123, + "loss": 1.3804, + "step": 1502 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018452573494383192, + "loss": 1.6271, + "step": 1503 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018450555175872547, + "loss": 1.8525, + "step": 1504 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018448535652497073, + "loss": 1.5303, + "step": 1505 + }, + { + "epoch": 1.79, + "learning_rate": 0.000184465149245447, + "loss": 2.0368, + "step": 1506 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018444492992303544, + "loss": 1.9951, + "step": 1507 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001844246985606189, + "loss": 1.8715, + "step": 1508 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018440445516108186, + "loss": 1.7373, + "step": 1509 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018438419972731067, + "loss": 1.7667, + "step": 1510 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018436393226219327, + "loss": 1.5134, + "step": 1511 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018434365276861938, + "loss": 1.3891, + "step": 1512 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001843233612494804, + "loss": 1.7066, + "step": 1513 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018430305770766948, + "loss": 1.6366, + "step": 1514 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842827421460814, + "loss": 1.7838, + "step": 1515 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842624145676128, + "loss": 1.7884, + "step": 1516 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001842420749751619, + "loss": 1.8428, + "step": 1517 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018422172337162867, + "loss": 1.4987, + "step": 1518 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018420135975991483, + "loss": 1.7576, + "step": 1519 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001841809841429238, + "loss": 1.8522, + "step": 1520 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018416059652356066, + "loss": 1.9308, + "step": 1521 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018414019690473227, + "loss": 1.4658, + "step": 1522 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018411978528934717, + "loss": 1.7072, + "step": 1523 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001840993616803156, + "loss": 1.736, + "step": 1524 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840789260805495, + "loss": 1.7712, + "step": 1525 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840584784929626, + "loss": 1.2231, + "step": 1526 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018403801892047023, + "loss": 1.8421, + "step": 1527 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018401754736598947, + "loss": 1.2689, + "step": 1528 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018399706383243918, + "loss": 1.8062, + "step": 1529 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839765683227398, + "loss": 1.6846, + "step": 1530 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839560608398136, + "loss": 1.8201, + "step": 1531 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018393554138658441, + "loss": 1.6958, + "step": 1532 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018391500996597796, + "loss": 1.8487, + "step": 1533 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001838944665809215, + "loss": 1.9788, + "step": 1534 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018387391123434412, + "loss": 1.6002, + "step": 1535 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018385334392917658, + "loss": 1.3859, + "step": 1536 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018383276466835127, + "loss": 2.0743, + "step": 1537 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018381217345480235, + "loss": 1.8357, + "step": 1538 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018379157029146573, + "loss": 1.7002, + "step": 1539 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018377095518127897, + "loss": 1.3058, + "step": 1540 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018375032812718124, + "loss": 1.8745, + "step": 1541 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018372968913211364, + "loss": 1.7847, + "step": 1542 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018370903819901874, + "loss": 1.8156, + "step": 1543 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018368837533084095, + "loss": 2.0152, + "step": 1544 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018366770053052634, + "loss": 1.5656, + "step": 1545 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018364701380102266, + "loss": 1.5753, + "step": 1546 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018362631514527947, + "loss": 1.3938, + "step": 1547 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018360560456624788, + "loss": 1.9599, + "step": 1548 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018358488206688075, + "loss": 1.8641, + "step": 1549 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018356414765013267, + "loss": 1.8428, + "step": 1550 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018354340131895998, + "loss": 1.6016, + "step": 1551 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018352264307632056, + "loss": 1.5768, + "step": 1552 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018350187292517415, + "loss": 1.5369, + "step": 1553 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001834810908684821, + "loss": 1.9717, + "step": 1554 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018346029690920746, + "loss": 1.943, + "step": 1555 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018343949105031505, + "loss": 1.8166, + "step": 1556 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018341867329477125, + "loss": 1.7149, + "step": 1557 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018339784364554426, + "loss": 1.4657, + "step": 1558 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018337700210560397, + "loss": 1.8693, + "step": 1559 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018335614867792183, + "loss": 1.7656, + "step": 1560 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001833352833654712, + "loss": 1.5123, + "step": 1561 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018331440617122696, + "loss": 1.7884, + "step": 1562 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832935170981657, + "loss": 1.7309, + "step": 1563 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018327261614926583, + "loss": 1.9628, + "step": 1564 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018325170332750732, + "loss": 1.6409, + "step": 1565 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832307786358719, + "loss": 1.6093, + "step": 1566 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018320984207734298, + "loss": 1.6111, + "step": 1567 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018318889365490565, + "loss": 2.0085, + "step": 1568 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018316793337154664, + "loss": 2.079, + "step": 1569 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018314696123025454, + "loss": 1.5466, + "step": 1570 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018312597723401942, + "loss": 2.0825, + "step": 1571 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001831049813858332, + "loss": 1.9748, + "step": 1572 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018308397368868945, + "loss": 1.6529, + "step": 1573 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018306295414558335, + "loss": 1.7119, + "step": 1574 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018304192275951184, + "loss": 1.8812, + "step": 1575 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018302087953347352, + "loss": 1.8676, + "step": 1576 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018299982447046877, + "loss": 1.879, + "step": 1577 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018297875757349952, + "loss": 1.6282, + "step": 1578 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018295767884556947, + "loss": 1.735, + "step": 1579 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018293658828968397, + "loss": 1.5796, + "step": 1580 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018291548590885007, + "loss": 1.8258, + "step": 1581 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018289437170607658, + "loss": 1.7531, + "step": 1582 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018287324568437381, + "loss": 1.6265, + "step": 1583 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018285210784675394, + "loss": 1.7997, + "step": 1584 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018283095819623078, + "loss": 1.955, + "step": 1585 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018280979673581977, + "loss": 1.6542, + "step": 1586 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018278862346853808, + "loss": 1.7634, + "step": 1587 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018276743839740458, + "loss": 2.0077, + "step": 1588 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018274624152543977, + "loss": 2.0254, + "step": 1589 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018272503285566587, + "loss": 1.4464, + "step": 1590 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018270381239110677, + "loss": 1.8643, + "step": 1591 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018268258013478804, + "loss": 1.3278, + "step": 1592 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018266133608973696, + "loss": 1.744, + "step": 1593 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018264008025898248, + "loss": 1.5079, + "step": 1594 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018261881264555516, + "loss": 1.9655, + "step": 1595 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001825975332524873, + "loss": 2.0557, + "step": 1596 + }, + { + "epoch": 1.9, + "learning_rate": 0.000182576242082813, + "loss": 1.7174, + "step": 1597 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018255493913956774, + "loss": 1.449, + "step": 1598 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018253362442578896, + "loss": 1.9058, + "step": 1599 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018251229794451567, + "loss": 1.3482, + "step": 1600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018249095969878853, + "loss": 1.7906, + "step": 1601 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018246960969164994, + "loss": 1.6177, + "step": 1602 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018244824792614393, + "loss": 1.5786, + "step": 1603 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018242687440531618, + "loss": 1.6451, + "step": 1604 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018240548913221416, + "loss": 1.3695, + "step": 1605 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001823840921098869, + "loss": 1.6648, + "step": 1606 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018236268334138515, + "loss": 2.1548, + "step": 1607 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018234126282976133, + "loss": 1.6153, + "step": 1608 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001823198305780696, + "loss": 1.741, + "step": 1609 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018229838658936564, + "loss": 1.7827, + "step": 1610 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018227693086670697, + "loss": 1.7343, + "step": 1611 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018225546341315261, + "loss": 1.8149, + "step": 1612 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001822339842317635, + "loss": 1.5497, + "step": 1613 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018221249332560198, + "loss": 1.7659, + "step": 1614 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001821909906977322, + "loss": 1.8992, + "step": 1615 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018216947635122, + "loss": 1.8682, + "step": 1616 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018214795028913288, + "loss": 1.9774, + "step": 1617 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001821264125145399, + "loss": 1.9441, + "step": 1618 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018210486303051195, + "loss": 2.0314, + "step": 1619 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001820833018401215, + "loss": 1.8234, + "step": 1620 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018206172894644272, + "loss": 1.9478, + "step": 1621 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018204014435255135, + "loss": 1.7894, + "step": 1622 + }, + { + "epoch": 1.93, + "learning_rate": 0.000182018548061525, + "loss": 1.5469, + "step": 1623 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018199694007644277, + "loss": 1.9419, + "step": 1624 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018197532040038547, + "loss": 1.6686, + "step": 1625 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018195368903643563, + "loss": 2.2525, + "step": 1626 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018193204598767744, + "loss": 1.8076, + "step": 1627 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018191039125719662, + "loss": 1.976, + "step": 1628 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018188872484808076, + "loss": 1.6896, + "step": 1629 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018186704676341898, + "loss": 1.6784, + "step": 1630 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018184535700630213, + "loss": 1.9634, + "step": 1631 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018182365557982264, + "loss": 1.7406, + "step": 1632 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018180194248707473, + "loss": 1.7492, + "step": 1633 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018178021773115414, + "loss": 1.7731, + "step": 1634 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018175848131515837, + "loss": 1.6232, + "step": 1635 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817367332421866, + "loss": 1.7488, + "step": 1636 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817149735153396, + "loss": 1.3398, + "step": 1637 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018169320213771983, + "loss": 1.4521, + "step": 1638 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018167141911243145, + "loss": 1.6311, + "step": 1639 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018164962444258014, + "loss": 1.8911, + "step": 1640 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018162781813127346, + "loss": 1.9879, + "step": 1641 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001816060001816205, + "loss": 1.5637, + "step": 1642 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018158417059673196, + "loss": 1.7461, + "step": 1643 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815623293797203, + "loss": 1.6671, + "step": 1644 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815404765336996, + "loss": 1.2124, + "step": 1645 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815186120617856, + "loss": 1.6402, + "step": 1646 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001814967359670957, + "loss": 1.8837, + "step": 1647 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018147484825274893, + "loss": 1.8027, + "step": 1648 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018145294892186605, + "loss": 1.7684, + "step": 1649 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001814310379775694, + "loss": 1.8274, + "step": 1650 + }, + { + "epoch": 1.97, + "learning_rate": 0.000181409115422983, + "loss": 1.8292, + "step": 1651 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018138718126123248, + "loss": 1.3492, + "step": 1652 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018136523549544523, + "loss": 1.509, + "step": 1653 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018134327812875024, + "loss": 1.7415, + "step": 1654 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018132130916427816, + "loss": 1.5223, + "step": 1655 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018129932860516126, + "loss": 1.9294, + "step": 1656 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018127733645453348, + "loss": 2.0716, + "step": 1657 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018125533271553043, + "loss": 1.57, + "step": 1658 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018123331739128938, + "loss": 2.2132, + "step": 1659 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018121129048494922, + "loss": 1.9006, + "step": 1660 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018118925199965048, + "loss": 1.9319, + "step": 1661 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018116720193853543, + "loss": 1.8103, + "step": 1662 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018114514030474787, + "loss": 1.7028, + "step": 1663 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018112306710143334, + "loss": 1.802, + "step": 1664 + }, + { + "epoch": 1.98, + "learning_rate": 0.000181100982331739, + "loss": 1.6835, + "step": 1665 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001810788859988136, + "loss": 1.7223, + "step": 1666 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810567781058077, + "loss": 1.5829, + "step": 1667 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018103465865587333, + "loss": 1.9863, + "step": 1668 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810125276521642, + "loss": 1.6398, + "step": 1669 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018099038509783582, + "loss": 1.9261, + "step": 1670 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018096823099604517, + "loss": 1.8882, + "step": 1671 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018094606534995093, + "loss": 1.6716, + "step": 1672 + }, + { + "epoch": 1.99, + "eval_loss": 2.075261354446411, + "eval_runtime": 283.9438, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 1672 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018092388816271345, + "loss": 1.6688, + "step": 1673 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018090169943749476, + "loss": 1.9127, + "step": 1674 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001808794991774584, + "loss": 1.7214, + "step": 1675 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018085728738576973, + "loss": 1.785, + "step": 1676 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018083506406559561, + "loss": 1.5287, + "step": 1677 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018081282922010464, + "loss": 1.9012, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018079058285246698, + "loss": 1.3094, + "step": 1679 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807683249658545, + "loss": 1.818, + "step": 1680 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807460555634407, + "loss": 1.9389, + "step": 1681 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807237746484007, + "loss": 1.4334, + "step": 1682 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018070148222391126, + "loss": 1.5422, + "step": 1683 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806791782931508, + "loss": 1.7899, + "step": 1684 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806568628592994, + "loss": 1.6106, + "step": 1685 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018063453592553872, + "loss": 1.9807, + "step": 1686 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806121974950521, + "loss": 1.1762, + "step": 1687 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018058984757102456, + "loss": 1.8338, + "step": 1688 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001805674861566426, + "loss": 1.5556, + "step": 1689 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805451132550946, + "loss": 0.87, + "step": 1690 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018052272886957038, + "loss": 1.0386, + "step": 1691 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805003330032615, + "loss": 0.8153, + "step": 1692 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018047792565936102, + "loss": 1.1745, + "step": 1693 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018045550684106388, + "loss": 1.1584, + "step": 1694 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018043307655156644, + "loss": 1.0742, + "step": 1695 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018041063479406675, + "loss": 1.0537, + "step": 1696 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803881815717646, + "loss": 1.0239, + "step": 1697 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803657168878612, + "loss": 0.9182, + "step": 1698 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018034324074555965, + "loss": 1.1856, + "step": 1699 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018032075314806448, + "loss": 1.3285, + "step": 1700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018029825409858198, + "loss": 1.2912, + "step": 1701 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018027574360032, + "loss": 1.3666, + "step": 1702 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018025322165648807, + "loss": 0.9621, + "step": 1703 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018023068827029723, + "loss": 0.8484, + "step": 1704 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018020814344496037, + "loss": 1.2236, + "step": 1705 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018018558718369186, + "loss": 0.8155, + "step": 1706 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001801630194897077, + "loss": 1.2047, + "step": 1707 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018014044036622555, + "loss": 1.0269, + "step": 1708 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018011784981646474, + "loss": 1.0536, + "step": 1709 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018009524784364615, + "loss": 1.0516, + "step": 1710 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018007263445099235, + "loss": 0.9087, + "step": 1711 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001800500096417275, + "loss": 1.3057, + "step": 1712 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018002737341907743, + "loss": 0.8791, + "step": 1713 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018000472578626956, + "loss": 1.1667, + "step": 1714 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017998206674653294, + "loss": 1.1026, + "step": 1715 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017995939630309826, + "loss": 1.3228, + "step": 1716 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001799367144591978, + "loss": 0.9173, + "step": 1717 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017991402121806557, + "loss": 1.0067, + "step": 1718 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798913165829371, + "loss": 1.0256, + "step": 1719 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017986860055704953, + "loss": 0.7645, + "step": 1720 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798458731436417, + "loss": 1.0567, + "step": 1721 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017982313434595406, + "loss": 0.7465, + "step": 1722 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017980038416722863, + "loss": 1.3268, + "step": 1723 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017977762261070916, + "loss": 0.9917, + "step": 1724 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017975484967964087, + "loss": 0.8592, + "step": 1725 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017973206537727073, + "loss": 1.43, + "step": 1726 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017970926970684725, + "loss": 1.3679, + "step": 1727 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017968646267162063, + "loss": 1.2959, + "step": 1728 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017966364427484267, + "loss": 1.0674, + "step": 1729 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017964081451976672, + "loss": 1.1153, + "step": 1730 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017961797340964783, + "loss": 1.0586, + "step": 1731 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017959512094774266, + "loss": 1.2388, + "step": 1732 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017957225713730949, + "loss": 1.257, + "step": 1733 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001795493819816081, + "loss": 1.099, + "step": 1734 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001795264954839001, + "loss": 0.9532, + "step": 1735 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017950359764744859, + "loss": 1.2553, + "step": 1736 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017948068847551825, + "loss": 0.9973, + "step": 1737 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017945776797137543, + "loss": 1.0637, + "step": 1738 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017943483613828815, + "loss": 1.1815, + "step": 1739 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017941189297952597, + "loss": 0.8378, + "step": 1740 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017938893849836002, + "loss": 0.9375, + "step": 1741 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017936597269806322, + "loss": 0.9653, + "step": 1742 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001793429955819099, + "loss": 1.221, + "step": 1743 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017932000715317612, + "loss": 1.041, + "step": 1744 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017929700741513955, + "loss": 1.0724, + "step": 1745 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017927399637107945, + "loss": 1.1102, + "step": 1746 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017925097402427667, + "loss": 0.8542, + "step": 1747 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001792279403780137, + "loss": 1.2339, + "step": 1748 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017920489543557465, + "loss": 0.8671, + "step": 1749 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791818392002452, + "loss": 0.9779, + "step": 1750 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791587716753127, + "loss": 1.1242, + "step": 1751 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017913569286406603, + "loss": 0.9043, + "step": 1752 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001791126027697958, + "loss": 0.7996, + "step": 1753 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017908950139579406, + "loss": 0.8602, + "step": 1754 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017906638874535462, + "loss": 1.0161, + "step": 1755 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017904326482177284, + "loss": 0.8226, + "step": 1756 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017902012962834566, + "loss": 1.3885, + "step": 1757 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001789969831683717, + "loss": 1.2158, + "step": 1758 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017897382544515108, + "loss": 1.3261, + "step": 1759 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017895065646198567, + "loss": 1.2144, + "step": 1760 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017892747622217875, + "loss": 0.9881, + "step": 1761 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001789042847290354, + "loss": 1.0342, + "step": 1762 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017888108198586217, + "loss": 0.7883, + "step": 1763 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017885786799596732, + "loss": 0.9006, + "step": 1764 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017883464276266064, + "loss": 1.3695, + "step": 1765 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001788114062892535, + "loss": 1.0303, + "step": 1766 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017878815857905897, + "loss": 1.3816, + "step": 1767 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001787648996353916, + "loss": 0.8684, + "step": 1768 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017874162946156772, + "loss": 1.1157, + "step": 1769 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017871834806090501, + "loss": 1.0087, + "step": 1770 + }, + { + "epoch": 2.1, + "learning_rate": 0.000178695055436723, + "loss": 0.7173, + "step": 1771 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017867175159234265, + "loss": 1.4784, + "step": 1772 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017864843653108662, + "loss": 1.1401, + "step": 1773 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786251102562791, + "loss": 1.0952, + "step": 1774 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786017727712459, + "loss": 0.9443, + "step": 1775 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017857842407931445, + "loss": 1.0682, + "step": 1776 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785550641838138, + "loss": 0.9402, + "step": 1777 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017853169308807448, + "loss": 1.0576, + "step": 1778 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785083107954288, + "loss": 1.1425, + "step": 1779 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017848491730921046, + "loss": 1.1402, + "step": 1780 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017846151263275494, + "loss": 1.4482, + "step": 1781 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017843809676939922, + "loss": 0.7765, + "step": 1782 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017841466972248188, + "loss": 1.1478, + "step": 1783 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001783912314953431, + "loss": 1.1876, + "step": 1784 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017836778209132464, + "loss": 1.2036, + "step": 1785 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783443215137699, + "loss": 1.0297, + "step": 1786 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783208497660239, + "loss": 0.8186, + "step": 1787 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017829736685143308, + "loss": 0.7258, + "step": 1788 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017827387277334568, + "loss": 0.8072, + "step": 1789 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017825036753511144, + "loss": 1.0474, + "step": 1790 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017822685114008167, + "loss": 1.2141, + "step": 1791 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017820332359160928, + "loss": 1.1443, + "step": 1792 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001781797848930488, + "loss": 0.9864, + "step": 1793 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017815623504775636, + "loss": 1.2998, + "step": 1794 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001781326740590896, + "loss": 1.0672, + "step": 1795 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017810910193040785, + "loss": 0.9152, + "step": 1796 + }, + { + "epoch": 2.13, + "learning_rate": 0.000178085518665072, + "loss": 1.2555, + "step": 1797 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017806192426644444, + "loss": 1.2085, + "step": 1798 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017803831873788926, + "loss": 1.6205, + "step": 1799 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001780147020827721, + "loss": 1.3382, + "step": 1800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017799107430446016, + "loss": 1.3309, + "step": 1801 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017796743540632223, + "loss": 1.2556, + "step": 1802 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017794378539172877, + "loss": 0.829, + "step": 1803 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017792012426405166, + "loss": 1.1711, + "step": 1804 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017789645202666456, + "loss": 1.0128, + "step": 1805 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017787276868294253, + "loss": 1.2074, + "step": 1806 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017784907423626237, + "loss": 1.0996, + "step": 1807 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778253686900023, + "loss": 0.9608, + "step": 1808 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778016520475423, + "loss": 0.827, + "step": 1809 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017777792431226383, + "loss": 1.2365, + "step": 1810 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017775418548754993, + "loss": 1.0276, + "step": 1811 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777304355767852, + "loss": 0.8178, + "step": 1812 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777066745833559, + "loss": 1.1297, + "step": 1813 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017768290251064987, + "loss": 1.1737, + "step": 1814 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017765911936205644, + "loss": 1.1606, + "step": 1815 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017763532514096658, + "loss": 1.2605, + "step": 1816 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001776115198507728, + "loss": 1.2271, + "step": 1817 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017758770349486923, + "loss": 0.9407, + "step": 1818 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001775638760766516, + "loss": 1.0273, + "step": 1819 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017754003759951715, + "loss": 1.0746, + "step": 1820 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017751618806686472, + "loss": 1.0091, + "step": 1821 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017749232748209473, + "loss": 0.997, + "step": 1822 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001774684558486092, + "loss": 1.4814, + "step": 1823 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017744457316981168, + "loss": 1.1407, + "step": 1824 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017742067944910737, + "loss": 0.9824, + "step": 1825 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017739677468990293, + "loss": 1.2603, + "step": 1826 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017737285889560668, + "loss": 1.3721, + "step": 1827 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017734893206962853, + "loss": 1.1186, + "step": 1828 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017732499421537984, + "loss": 0.7693, + "step": 1829 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001773010453362737, + "loss": 1.0449, + "step": 1830 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017727708543572467, + "loss": 0.9331, + "step": 1831 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001772531145171489, + "loss": 0.739, + "step": 1832 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017722913258396417, + "loss": 0.9076, + "step": 1833 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017720513963958968, + "loss": 1.3464, + "step": 1834 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017718113568744638, + "loss": 0.8858, + "step": 1835 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017715712073095672, + "loss": 1.3204, + "step": 1836 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017713309477354467, + "loss": 1.0538, + "step": 1837 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001771090578186358, + "loss": 1.44, + "step": 1838 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770850098696573, + "loss": 1.0167, + "step": 1839 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017706095093003785, + "loss": 0.9724, + "step": 1840 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017703688100320774, + "loss": 0.8055, + "step": 1841 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770128000925988, + "loss": 0.7363, + "step": 1842 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017698870820164446, + "loss": 1.1329, + "step": 1843 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017696460533377968, + "loss": 0.9487, + "step": 1844 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017694049149244104, + "loss": 1.2571, + "step": 1845 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001769163666810666, + "loss": 0.9148, + "step": 1846 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017689223090309607, + "loss": 1.4676, + "step": 1847 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017686808416197072, + "loss": 0.9395, + "step": 1848 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017684392646113325, + "loss": 0.9632, + "step": 1849 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017681975780402807, + "loss": 1.0037, + "step": 1850 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001767955781941011, + "loss": 0.9557, + "step": 1851 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017677138763479985, + "loss": 1.2799, + "step": 1852 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017674718612957336, + "loss": 0.8461, + "step": 1853 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001767229736818722, + "loss": 1.2762, + "step": 1854 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017669875029514856, + "loss": 1.4801, + "step": 1855 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017667451597285617, + "loss": 0.9849, + "step": 1856 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766502707184503, + "loss": 1.0875, + "step": 1857 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017662601453538783, + "loss": 0.8346, + "step": 1858 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766017474271271, + "loss": 1.1933, + "step": 1859 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017657746939712815, + "loss": 0.8789, + "step": 1860 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017655318044885245, + "loss": 1.0091, + "step": 1861 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001765288805857631, + "loss": 0.7371, + "step": 1862 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017650456981132466, + "loss": 0.8131, + "step": 1863 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017648024812900342, + "loss": 1.0795, + "step": 1864 + }, + { + "epoch": 2.21, + "learning_rate": 0.000176455915542267, + "loss": 0.9882, + "step": 1865 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017643157205458483, + "loss": 1.212, + "step": 1866 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017640721766942768, + "loss": 1.4755, + "step": 1867 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017638285239026798, + "loss": 1.0391, + "step": 1868 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017635847622057965, + "loss": 1.2568, + "step": 1869 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017633408916383826, + "loss": 1.2138, + "step": 1870 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001763096912235208, + "loss": 1.196, + "step": 1871 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017628528240310596, + "loss": 1.1476, + "step": 1872 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017626086270607384, + "loss": 1.1421, + "step": 1873 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017623643213590619, + "loss": 1.0711, + "step": 1874 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001762119906960863, + "loss": 0.8842, + "step": 1875 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017618753839009893, + "loss": 0.798, + "step": 1876 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761630752214305, + "loss": 0.8591, + "step": 1877 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017613860119356883, + "loss": 0.7646, + "step": 1878 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761141163100035, + "loss": 1.4113, + "step": 1879 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017608962057422549, + "loss": 0.8605, + "step": 1880 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017606511398972731, + "loss": 0.6179, + "step": 1881 + }, + { + "epoch": 2.23, + "eval_loss": 2.3971996307373047, + "eval_runtime": 283.7444, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1881 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760405965600031, + "loss": 0.8651, + "step": 1882 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760160682885485, + "loss": 1.3178, + "step": 1883 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017599152917886071, + "loss": 0.9233, + "step": 1884 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017596697923443847, + "loss": 0.9126, + "step": 1885 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001759424184587821, + "loss": 0.9749, + "step": 1886 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017591784685539334, + "loss": 1.1929, + "step": 1887 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017589326442777565, + "loss": 1.2026, + "step": 1888 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017586867117943392, + "loss": 1.1162, + "step": 1889 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017584406711387463, + "loss": 0.9818, + "step": 1890 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001758194522346057, + "loss": 0.9802, + "step": 1891 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001757948265451368, + "loss": 0.8963, + "step": 1892 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017577019004897897, + "loss": 1.0359, + "step": 1893 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017574554274964478, + "loss": 1.0788, + "step": 1894 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017572088465064848, + "loss": 0.9415, + "step": 1895 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756962157555057, + "loss": 1.0944, + "step": 1896 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017567153606773373, + "loss": 1.357, + "step": 1897 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017564684559085136, + "loss": 1.0108, + "step": 1898 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756221443283789, + "loss": 0.5337, + "step": 1899 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755974322838382, + "loss": 1.4234, + "step": 1900 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755727094607527, + "loss": 0.9083, + "step": 1901 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017554797586264727, + "loss": 0.9199, + "step": 1902 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017552323149304844, + "loss": 1.1885, + "step": 1903 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754984763554842, + "loss": 1.276, + "step": 1904 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754737104534841, + "loss": 0.8882, + "step": 1905 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017544893379057918, + "loss": 0.993, + "step": 1906 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754241463703021, + "loss": 1.261, + "step": 1907 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017539934819618696, + "loss": 0.9877, + "step": 1908 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017537453927176947, + "loss": 0.9991, + "step": 1909 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017534971960058685, + "loss": 1.2012, + "step": 1910 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001753248891861778, + "loss": 0.864, + "step": 1911 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017530004803208263, + "loss": 1.0382, + "step": 1912 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017527519614184316, + "loss": 1.068, + "step": 1913 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017525033351900268, + "loss": 0.8687, + "step": 1914 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752254601671061, + "loss": 1.1174, + "step": 1915 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752005760896998, + "loss": 1.269, + "step": 1916 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751756812903317, + "loss": 0.7387, + "step": 1917 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751507757725513, + "loss": 0.8484, + "step": 1918 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001751258595399095, + "loss": 1.0092, + "step": 1919 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017510093259595885, + "loss": 1.0145, + "step": 1920 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017507599494425344, + "loss": 1.2969, + "step": 1921 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017505104658834875, + "loss": 0.7925, + "step": 1922 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017502608753180196, + "loss": 0.8974, + "step": 1923 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017500111777817164, + "loss": 0.764, + "step": 1924 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001749761373310179, + "loss": 1.1057, + "step": 1925 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017495114619390246, + "loss": 0.8092, + "step": 1926 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017492614437038845, + "loss": 0.9553, + "step": 1927 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017490113186404067, + "loss": 1.0278, + "step": 1928 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748761086784253, + "loss": 1.2152, + "step": 1929 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017485107481711012, + "loss": 1.5154, + "step": 1930 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748260302836644, + "loss": 1.1973, + "step": 1931 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017480097508165896, + "loss": 0.9429, + "step": 1932 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747759092146661, + "loss": 1.5453, + "step": 1933 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747508326862597, + "loss": 1.1691, + "step": 1934 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017472574550001508, + "loss": 1.2094, + "step": 1935 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017470064765950918, + "loss": 1.0777, + "step": 1936 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017467553916832035, + "loss": 1.0883, + "step": 1937 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017465042003002857, + "loss": 0.9297, + "step": 1938 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017462529024821522, + "loss": 0.7814, + "step": 1939 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017460014982646334, + "loss": 1.3645, + "step": 1940 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001745749987683573, + "loss": 1.0604, + "step": 1941 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017454983707748317, + "loss": 0.9416, + "step": 1942 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017452466475742845, + "loss": 1.4187, + "step": 1943 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017449948181178215, + "loss": 1.1619, + "step": 1944 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017447428824413482, + "loss": 1.1381, + "step": 1945 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017444908405807845, + "loss": 1.2304, + "step": 1946 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001744238692572067, + "loss": 1.2149, + "step": 1947 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017439864384511463, + "loss": 0.8172, + "step": 1948 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017437340782539877, + "loss": 1.0783, + "step": 1949 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017434816120165728, + "loss": 1.0661, + "step": 1950 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017432290397748982, + "loss": 1.1959, + "step": 1951 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001742976361564974, + "loss": 1.0581, + "step": 1952 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017427235774228274, + "loss": 0.8948, + "step": 1953 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017424706873845, + "loss": 1.2565, + "step": 1954 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017422176914860476, + "loss": 0.9237, + "step": 1955 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017419645897635432, + "loss": 1.219, + "step": 1956 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017417113822530727, + "loss": 1.4606, + "step": 1957 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017414580689907377, + "loss": 0.714, + "step": 1958 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001741204650012656, + "loss": 1.2223, + "step": 1959 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017409511253549593, + "loss": 0.9828, + "step": 1960 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017406974950537942, + "loss": 0.9954, + "step": 1961 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017404437591453235, + "loss": 1.0307, + "step": 1962 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001740189917665724, + "loss": 0.9331, + "step": 1963 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739935970651188, + "loss": 1.3517, + "step": 1964 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017396819181379232, + "loss": 1.2024, + "step": 1965 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739427760162151, + "loss": 0.9696, + "step": 1966 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017391734967601102, + "loss": 1.1559, + "step": 1967 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001738919127968052, + "loss": 1.3104, + "step": 1968 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017386646538222443, + "loss": 0.9073, + "step": 1969 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017384100743589697, + "loss": 1.0539, + "step": 1970 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017381553896145255, + "loss": 0.9873, + "step": 1971 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737900599625224, + "loss": 0.9466, + "step": 1972 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737645704427393, + "loss": 1.0639, + "step": 1973 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737390704057375, + "loss": 0.5843, + "step": 1974 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017371355985515275, + "loss": 1.1318, + "step": 1975 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017368803879462227, + "loss": 1.0116, + "step": 1976 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001736625072277848, + "loss": 0.8845, + "step": 1977 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017363696515828062, + "loss": 0.8081, + "step": 1978 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017361141258975148, + "loss": 0.8795, + "step": 1979 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735858495258406, + "loss": 0.9725, + "step": 1980 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735602759701927, + "loss": 1.0164, + "step": 1981 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017353469192645405, + "loss": 1.2937, + "step": 1982 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735090973982723, + "loss": 1.0842, + "step": 1983 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017348349238929678, + "loss": 1.0043, + "step": 1984 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017345787690317815, + "loss": 1.1302, + "step": 1985 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017343225094356855, + "loss": 1.195, + "step": 1986 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017340661451412183, + "loss": 1.1449, + "step": 1987 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017338096761849309, + "loss": 1.2244, + "step": 1988 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017335531026033897, + "loss": 0.9273, + "step": 1989 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017332964244331776, + "loss": 1.0448, + "step": 1990 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017330396417108908, + "loss": 1.0074, + "step": 1991 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017327827544731412, + "loss": 0.9284, + "step": 1992 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001732525762756555, + "loss": 1.0307, + "step": 1993 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017322686665977737, + "loss": 1.1526, + "step": 1994 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017320114660334535, + "loss": 0.819, + "step": 1995 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017317541611002656, + "loss": 1.1029, + "step": 1996 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017314967518348962, + "loss": 1.2471, + "step": 1997 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017312392382740462, + "loss": 1.0156, + "step": 1998 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017309816204544317, + "loss": 1.1843, + "step": 1999 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017307238984127832, + "loss": 1.1588, + "step": 2000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017304660721858457, + "loss": 1.0157, + "step": 2001 + }, + { + "epoch": 2.38, + "learning_rate": 0.000173020814181038, + "loss": 1.0563, + "step": 2002 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017299501073231622, + "loss": 1.1883, + "step": 2003 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017296919687609808, + "loss": 0.9404, + "step": 2004 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017294337261606417, + "loss": 1.2495, + "step": 2005 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017291753795589643, + "loss": 1.0074, + "step": 2006 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017289169289927837, + "loss": 1.1411, + "step": 2007 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017286583744989488, + "loss": 0.9942, + "step": 2008 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017283997161143239, + "loss": 0.952, + "step": 2009 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017281409538757883, + "loss": 1.2966, + "step": 2010 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017278820878202357, + "loss": 1.0836, + "step": 2011 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727623117984575, + "loss": 1.0984, + "step": 2012 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727364044405729, + "loss": 0.8822, + "step": 2013 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017271048671206366, + "loss": 1.2014, + "step": 2014 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017268455861662503, + "loss": 1.1779, + "step": 2015 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017265862015795384, + "loss": 0.9966, + "step": 2016 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017263267133974832, + "loss": 0.9536, + "step": 2017 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017260671216570822, + "loss": 0.811, + "step": 2018 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017258074263953472, + "loss": 0.8241, + "step": 2019 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017255476276493056, + "loss": 1.1263, + "step": 2020 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017252877254559986, + "loss": 0.995, + "step": 2021 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001725027719852483, + "loss": 1.1481, + "step": 2022 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001724767610875829, + "loss": 1.129, + "step": 2023 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017245073985631238, + "loss": 0.5928, + "step": 2024 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017242470829514672, + "loss": 0.8326, + "step": 2025 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017239866640779745, + "loss": 1.1092, + "step": 2026 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017237261419797756, + "loss": 1.5015, + "step": 2027 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001723465516694016, + "loss": 0.9775, + "step": 2028 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017232047882578548, + "loss": 0.9348, + "step": 2029 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001722943956708466, + "loss": 0.6199, + "step": 2030 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017226830220830384, + "loss": 1.1485, + "step": 2031 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017224219844187764, + "loss": 1.1195, + "step": 2032 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017221608437528973, + "loss": 1.0528, + "step": 2033 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017218996001226345, + "loss": 1.1058, + "step": 2034 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017216382535652355, + "loss": 1.1451, + "step": 2035 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001721376804117963, + "loss": 1.2251, + "step": 2036 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017211152518180936, + "loss": 1.0708, + "step": 2037 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017208535967029188, + "loss": 1.0746, + "step": 2038 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017205918388097456, + "loss": 1.3262, + "step": 2039 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017203299781758943, + "loss": 0.7619, + "step": 2040 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017200680148387007, + "loss": 1.01, + "step": 2041 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001719805948835515, + "loss": 1.1651, + "step": 2042 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017195437802037026, + "loss": 1.4671, + "step": 2043 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017192815089806424, + "loss": 0.9857, + "step": 2044 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001719019135203729, + "loss": 1.2613, + "step": 2045 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017187566589103704, + "loss": 1.4386, + "step": 2046 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001718494080137991, + "loss": 1.0965, + "step": 2047 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017182313989240285, + "loss": 0.752, + "step": 2048 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017179686153059352, + "loss": 0.9126, + "step": 2049 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017177057293211784, + "loss": 1.5075, + "step": 2050 + }, + { + "epoch": 2.43, + "learning_rate": 0.000171744274100724, + "loss": 1.0407, + "step": 2051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017171796504016166, + "loss": 0.8263, + "step": 2052 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001716916457541819, + "loss": 0.9453, + "step": 2053 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017166531624653722, + "loss": 0.9777, + "step": 2054 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017163897652098172, + "loss": 1.2129, + "step": 2055 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017161262658127086, + "loss": 1.3642, + "step": 2056 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017158626643116152, + "loss": 0.6798, + "step": 2057 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017155989607441213, + "loss": 0.874, + "step": 2058 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017153351551478247, + "loss": 1.0636, + "step": 2059 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001715071247560339, + "loss": 1.0563, + "step": 2060 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714807238019291, + "loss": 1.1984, + "step": 2061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017145431265623234, + "loss": 0.9444, + "step": 2062 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714278913227092, + "loss": 0.7809, + "step": 2063 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017140145980512684, + "loss": 1.649, + "step": 2064 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713750181072538, + "loss": 1.0956, + "step": 2065 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713485662328601, + "loss": 1.2845, + "step": 2066 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017132210418571714, + "loss": 1.0484, + "step": 2067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017129563196959793, + "loss": 1.0291, + "step": 2068 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017126914958827679, + "loss": 1.1226, + "step": 2069 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001712426570455295, + "loss": 1.0119, + "step": 2070 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017121615434513332, + "loss": 1.1663, + "step": 2071 + }, + { + "epoch": 2.46, + "learning_rate": 0.000171189641490867, + "loss": 1.1353, + "step": 2072 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017116311848651064, + "loss": 1.0761, + "step": 2073 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017113658533584594, + "loss": 1.1978, + "step": 2074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017111004204265582, + "loss": 1.3881, + "step": 2075 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017108348861072484, + "loss": 1.3945, + "step": 2076 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017105692504383897, + "loss": 1.3796, + "step": 2077 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017103035134578555, + "loss": 1.1721, + "step": 2078 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001710037675203534, + "loss": 1.0061, + "step": 2079 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017097717357133284, + "loss": 1.2456, + "step": 2080 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017095056950251555, + "loss": 0.788, + "step": 2081 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001709239553176947, + "loss": 1.16, + "step": 2082 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001708973310206649, + "loss": 1.0498, + "step": 2083 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017087069661522218, + "loss": 0.8993, + "step": 2084 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017084405210516406, + "loss": 1.2088, + "step": 2085 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001708173974942894, + "loss": 1.0897, + "step": 2086 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017079073278639863, + "loss": 1.2718, + "step": 2087 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017076405798529355, + "loss": 1.2325, + "step": 2088 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017073737309477736, + "loss": 1.0555, + "step": 2089 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017071067811865476, + "loss": 1.1428, + "step": 2090 + }, + { + "epoch": 2.48, + "eval_loss": 2.3191208839416504, + "eval_runtime": 284.1375, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2090 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706839730607319, + "loss": 1.0908, + "step": 2091 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706572579248163, + "loss": 1.2092, + "step": 2092 + }, + { + "epoch": 2.48, + "learning_rate": 0.000170630532714717, + "loss": 1.1735, + "step": 2093 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001706037974342444, + "loss": 1.2716, + "step": 2094 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017057705208721035, + "loss": 1.0095, + "step": 2095 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001705502966774282, + "loss": 1.3059, + "step": 2096 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017052353120871266, + "loss": 0.8269, + "step": 2097 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001704967556848799, + "loss": 1.0615, + "step": 2098 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017046997010974755, + "loss": 1.2709, + "step": 2099 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017044317448713461, + "loss": 1.1633, + "step": 2100 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017041636882086158, + "loss": 0.9273, + "step": 2101 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017038955311475038, + "loss": 1.3117, + "step": 2102 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001703627273726243, + "loss": 0.8883, + "step": 2103 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017033589159830815, + "loss": 1.1371, + "step": 2104 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017030904579562806, + "loss": 1.5402, + "step": 2105 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017028218996841172, + "loss": 0.9156, + "step": 2106 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017025532412048817, + "loss": 1.0962, + "step": 2107 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001702284482556879, + "loss": 0.9402, + "step": 2108 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017020156237784279, + "loss": 0.8146, + "step": 2109 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001701746664907862, + "loss": 1.1718, + "step": 2110 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017014776059835288, + "loss": 1.0618, + "step": 2111 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017012084470437907, + "loss": 1.4796, + "step": 2112 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017009391881270237, + "loss": 0.8402, + "step": 2113 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017006698292716178, + "loss": 1.1641, + "step": 2114 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001700400370515978, + "loss": 1.241, + "step": 2115 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017001308118985237, + "loss": 0.8683, + "step": 2116 + }, + { + "epoch": 2.51, + "learning_rate": 0.00016998611534576873, + "loss": 1.2697, + "step": 2117 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016995913952319168, + "loss": 0.9233, + "step": 2118 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016993215372596737, + "loss": 1.2472, + "step": 2119 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016990515795794334, + "loss": 1.2541, + "step": 2120 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016987815222296865, + "loss": 1.0016, + "step": 2121 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016985113652489374, + "loss": 1.0678, + "step": 2122 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016982411086757037, + "loss": 1.6066, + "step": 2123 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016979707525485192, + "loss": 1.229, + "step": 2124 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016977002969059302, + "loss": 0.752, + "step": 2125 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016974297417864977, + "loss": 0.8752, + "step": 2126 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001697159087228797, + "loss": 0.8896, + "step": 2127 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016968883332714186, + "loss": 0.9657, + "step": 2128 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696617479952964, + "loss": 1.3657, + "step": 2129 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696346527312053, + "loss": 0.9876, + "step": 2130 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016960754753873162, + "loss": 1.0165, + "step": 2131 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016958043242174003, + "loss": 1.625, + "step": 2132 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016955330738409655, + "loss": 1.5502, + "step": 2133 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016952617242966864, + "loss": 1.0793, + "step": 2134 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016949902756232507, + "loss": 1.4425, + "step": 2135 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016947187278593622, + "loss": 1.3124, + "step": 2136 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016944470810437365, + "loss": 0.927, + "step": 2137 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016941753352151055, + "loss": 1.1911, + "step": 2138 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016939034904122138, + "loss": 1.0768, + "step": 2139 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016936315466738205, + "loss": 1.1277, + "step": 2140 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016933595040386984, + "loss": 0.812, + "step": 2141 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001693087362545636, + "loss": 0.8299, + "step": 2142 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016928151222334338, + "loss": 1.1125, + "step": 2143 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016925427831409077, + "loss": 1.1835, + "step": 2144 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016922703453068873, + "loss": 1.2007, + "step": 2145 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016919978087702163, + "loss": 0.8524, + "step": 2146 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016917251735697523, + "loss": 0.9497, + "step": 2147 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016914524397443673, + "loss": 1.1004, + "step": 2148 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016911796073329466, + "loss": 0.8347, + "step": 2149 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016909066763743912, + "loss": 0.9492, + "step": 2150 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016906336469076148, + "loss": 1.1406, + "step": 2151 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016903605189715447, + "loss": 1.0137, + "step": 2152 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001690087292605124, + "loss": 1.0624, + "step": 2153 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016898139678473076, + "loss": 1.1767, + "step": 2154 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001689540544737067, + "loss": 1.4184, + "step": 2155 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016892670233133856, + "loss": 0.957, + "step": 2156 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016889934036152618, + "loss": 1.0399, + "step": 2157 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016887196856817073, + "loss": 1.2009, + "step": 2158 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016884458695517495, + "loss": 1.3977, + "step": 2159 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016881719552644273, + "loss": 1.1328, + "step": 2160 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016878979428587955, + "loss": 1.5007, + "step": 2161 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016876238323739221, + "loss": 1.1248, + "step": 2162 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016873496238488899, + "loss": 1.0358, + "step": 2163 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016870753173227945, + "loss": 1.2961, + "step": 2164 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016868009128347459, + "loss": 0.9435, + "step": 2165 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016865264104238683, + "loss": 0.9642, + "step": 2166 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016862518101293, + "loss": 1.0169, + "step": 2167 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016859771119901929, + "loss": 1.0904, + "step": 2168 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001685702316045713, + "loss": 1.3178, + "step": 2169 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016854274223350397, + "loss": 1.1395, + "step": 2170 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016851524308973678, + "loss": 1.1207, + "step": 2171 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016848773417719044, + "loss": 1.3544, + "step": 2172 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016846021549978715, + "loss": 1.3503, + "step": 2173 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016843268706145042, + "loss": 1.4276, + "step": 2174 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016840514886610529, + "loss": 0.9888, + "step": 2175 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016837760091767802, + "loss": 1.0913, + "step": 2176 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001683500432200964, + "loss": 1.4781, + "step": 2177 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016832247577728955, + "loss": 1.2657, + "step": 2178 + }, + { + "epoch": 2.59, + "learning_rate": 0.000168294898593188, + "loss": 0.9206, + "step": 2179 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682673116717236, + "loss": 0.9218, + "step": 2180 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682397150168297, + "loss": 1.2719, + "step": 2181 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016821210863244096, + "loss": 0.984, + "step": 2182 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016818449252249345, + "loss": 1.4641, + "step": 2183 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001681568666909246, + "loss": 1.2571, + "step": 2184 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016812923114167328, + "loss": 1.2025, + "step": 2185 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016810158587867973, + "loss": 0.9621, + "step": 2186 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016807393090588553, + "loss": 1.0016, + "step": 2187 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016804626622723368, + "loss": 1.031, + "step": 2188 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016801859184666857, + "loss": 0.7573, + "step": 2189 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016799090776813597, + "loss": 1.2694, + "step": 2190 + }, + { + "epoch": 2.6, + "learning_rate": 0.000167963213995583, + "loss": 1.196, + "step": 2191 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016793551053295822, + "loss": 0.8754, + "step": 2192 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016790779738421152, + "loss": 1.1743, + "step": 2193 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678800745532942, + "loss": 1.0921, + "step": 2194 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016785234204415888, + "loss": 0.8778, + "step": 2195 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678245998607597, + "loss": 1.0528, + "step": 2196 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016779684800705203, + "loss": 1.0255, + "step": 2197 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001677690864869927, + "loss": 0.6344, + "step": 2198 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016774131530453992, + "loss": 0.8691, + "step": 2199 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016771353446365318, + "loss": 1.2061, + "step": 2200 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001676857439682935, + "loss": 1.1759, + "step": 2201 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016765794382242314, + "loss": 1.1118, + "step": 2202 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016763013403000584, + "loss": 1.3005, + "step": 2203 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016760231459500666, + "loss": 1.0415, + "step": 2204 + }, + { + "epoch": 2.62, + "learning_rate": 0.000167574485521392, + "loss": 0.824, + "step": 2205 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016754664681312975, + "loss": 0.6682, + "step": 2206 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016751879847418905, + "loss": 1.9204, + "step": 2207 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016749094050854047, + "loss": 0.9931, + "step": 2208 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016746307292015602, + "loss": 0.8898, + "step": 2209 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016743519571300888, + "loss": 1.3337, + "step": 2210 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016740730889107383, + "loss": 1.2947, + "step": 2211 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673794124583269, + "loss": 1.1882, + "step": 2212 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673515064187455, + "loss": 1.5408, + "step": 2213 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016732359077630847, + "loss": 1.1273, + "step": 2214 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001672956655349959, + "loss": 0.8954, + "step": 2215 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016726773069878934, + "loss": 1.1747, + "step": 2216 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016723978627167173, + "loss": 0.807, + "step": 2217 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016721183225762727, + "loss": 1.2512, + "step": 2218 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016718386866064166, + "loss": 1.0796, + "step": 2219 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016715589548470185, + "loss": 1.0905, + "step": 2220 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016712791273379622, + "loss": 1.3779, + "step": 2221 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016709992041191452, + "loss": 1.2015, + "step": 2222 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016707191852304782, + "loss": 0.8612, + "step": 2223 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001670439070711886, + "loss": 1.1819, + "step": 2224 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016701588606033064, + "loss": 1.2715, + "step": 2225 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001669878554944692, + "loss": 1.3681, + "step": 2226 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016695981537760072, + "loss": 1.1254, + "step": 2227 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669317657137232, + "loss": 0.9476, + "step": 2228 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669037065068359, + "loss": 1.235, + "step": 2229 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016687563776093941, + "loss": 0.7356, + "step": 2230 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016684755948003573, + "loss": 0.7901, + "step": 2231 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016681947166812824, + "loss": 1.317, + "step": 2232 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016679137432922163, + "loss": 0.8832, + "step": 2233 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016676326746732195, + "loss": 1.2776, + "step": 2234 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016673515108643665, + "loss": 1.0435, + "step": 2235 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001667070251905745, + "loss": 1.0957, + "step": 2236 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016667888978374567, + "loss": 1.0862, + "step": 2237 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016665074486996165, + "loss": 1.1112, + "step": 2238 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001666225904532352, + "loss": 1.3633, + "step": 2239 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016659442653758064, + "loss": 1.444, + "step": 2240 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016656625312701348, + "loss": 0.8248, + "step": 2241 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016653807022555067, + "loss": 1.2522, + "step": 2242 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001665098778372104, + "loss": 1.2107, + "step": 2243 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001664816759660124, + "loss": 1.0813, + "step": 2244 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016645346461597753, + "loss": 1.1136, + "step": 2245 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016642524379112817, + "loss": 1.1003, + "step": 2246 + }, + { + "epoch": 2.67, + "learning_rate": 0.000166397013495488, + "loss": 1.0635, + "step": 2247 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016636877373308204, + "loss": 1.0575, + "step": 2248 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016634052450793663, + "loss": 0.7693, + "step": 2249 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016631226582407952, + "loss": 1.5965, + "step": 2250 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001662839976855398, + "loss": 1.0989, + "step": 2251 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016625572009634787, + "loss": 0.9198, + "step": 2252 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016622743306053548, + "loss": 1.0896, + "step": 2253 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016619913658213578, + "loss": 1.015, + "step": 2254 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001661708306651832, + "loss": 0.8572, + "step": 2255 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016614251531371353, + "loss": 1.1508, + "step": 2256 + }, + { + "epoch": 2.68, + "learning_rate": 0.000166114190531764, + "loss": 1.1852, + "step": 2257 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016608585632337306, + "loss": 0.932, + "step": 2258 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016605751269258053, + "loss": 1.2542, + "step": 2259 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016602915964342757, + "loss": 0.943, + "step": 2260 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016600079717995678, + "loss": 1.2438, + "step": 2261 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016597242530621203, + "loss": 0.9928, + "step": 2262 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016594404402623845, + "loss": 0.9516, + "step": 2263 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016591565334408265, + "loss": 1.1689, + "step": 2264 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001658872532637925, + "loss": 1.3155, + "step": 2265 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016585884378941725, + "loss": 1.1596, + "step": 2266 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016583042492500746, + "loss": 0.9956, + "step": 2267 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016580199667461508, + "loss": 0.9289, + "step": 2268 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016577355904229325, + "loss": 1.3225, + "step": 2269 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016574511203209667, + "loss": 1.0384, + "step": 2270 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001657166556480812, + "loss": 0.697, + "step": 2271 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016568818989430416, + "loss": 0.7702, + "step": 2272 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016565971477482404, + "loss": 1.1041, + "step": 2273 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016563123029370093, + "loss": 1.0462, + "step": 2274 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001656027364549959, + "loss": 1.0797, + "step": 2275 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001655742332627717, + "loss": 1.3301, + "step": 2276 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001655457207210922, + "loss": 1.0467, + "step": 2277 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016551719883402271, + "loss": 0.9432, + "step": 2278 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016548866760562978, + "loss": 1.1808, + "step": 2279 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016546012703998138, + "loss": 1.1094, + "step": 2280 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016543157714114673, + "loss": 1.3914, + "step": 2281 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016540301791319645, + "loss": 1.0402, + "step": 2282 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016537444936020246, + "loss": 0.9815, + "step": 2283 + }, + { + "epoch": 2.71, + "learning_rate": 0.000165345871486238, + "loss": 0.9722, + "step": 2284 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016531728429537766, + "loss": 0.919, + "step": 2285 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016528868779169738, + "loss": 1.1242, + "step": 2286 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016526008197927436, + "loss": 1.1794, + "step": 2287 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016523146686218718, + "loss": 1.434, + "step": 2288 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016520284244451574, + "loss": 0.8463, + "step": 2289 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016517420873034123, + "loss": 1.1736, + "step": 2290 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001651455657237462, + "loss": 1.0431, + "step": 2291 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016511691342881453, + "loss": 1.2796, + "step": 2292 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650882518496314, + "loss": 1.0578, + "step": 2293 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016505958099028334, + "loss": 1.3914, + "step": 2294 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650309008548582, + "loss": 1.0046, + "step": 2295 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650022114474451, + "loss": 1.0246, + "step": 2296 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016497351277213458, + "loss": 1.2789, + "step": 2297 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016494480483301836, + "loss": 1.0036, + "step": 2298 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016491608763418968, + "loss": 0.886, + "step": 2299 + }, + { + "epoch": 2.73, + "eval_loss": 2.3017475605010986, + "eval_runtime": 283.8846, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2299 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648873611797429, + "loss": 1.3953, + "step": 2300 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648586254737738, + "loss": 0.6972, + "step": 2301 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016482988052037947, + "loss": 1.2311, + "step": 2302 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016480112632365833, + "loss": 1.327, + "step": 2303 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647723628877101, + "loss": 0.9534, + "step": 2304 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647435902166358, + "loss": 0.9164, + "step": 2305 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647148083145378, + "loss": 1.1038, + "step": 2306 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016468601718551976, + "loss": 1.0444, + "step": 2307 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016465721683368666, + "loss": 1.2635, + "step": 2308 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016462840726314486, + "loss": 1.1647, + "step": 2309 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016459958847800187, + "loss": 1.3617, + "step": 2310 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016457076048236675, + "loss": 1.2355, + "step": 2311 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016454192328034962, + "loss": 0.9989, + "step": 2312 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016451307687606213, + "loss": 1.1218, + "step": 2313 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016448422127361706, + "loss": 0.8967, + "step": 2314 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644553564771287, + "loss": 1.159, + "step": 2315 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644264824907124, + "loss": 1.5901, + "step": 2316 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001643975993184851, + "loss": 0.979, + "step": 2317 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016436870696456482, + "loss": 0.8561, + "step": 2318 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016433980543307107, + "loss": 0.9485, + "step": 2319 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016431089472812444, + "loss": 0.7736, + "step": 2320 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016428197485384707, + "loss": 1.2546, + "step": 2321 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016425304581436226, + "loss": 0.9534, + "step": 2322 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001642241076137947, + "loss": 0.8182, + "step": 2323 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641951602562703, + "loss": 1.1107, + "step": 2324 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641662037459164, + "loss": 1.0628, + "step": 2325 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016413723808686147, + "loss": 1.6261, + "step": 2326 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001641082632832354, + "loss": 1.0286, + "step": 2327 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001640792793391694, + "loss": 0.5732, + "step": 2328 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016405028625879594, + "loss": 1.0932, + "step": 2329 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016402128404624882, + "loss": 1.2585, + "step": 2330 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016399227270566308, + "loss": 0.8788, + "step": 2331 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639632522411751, + "loss": 1.1397, + "step": 2332 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016393422265692262, + "loss": 1.3517, + "step": 2333 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639051839570446, + "loss": 1.1346, + "step": 2334 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016387613614568126, + "loss": 0.9594, + "step": 2335 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001638470792269743, + "loss": 1.0674, + "step": 2336 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016381801320506653, + "loss": 0.9123, + "step": 2337 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016378893808410215, + "loss": 1.1909, + "step": 2338 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016375985386822664, + "loss": 1.0474, + "step": 2339 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016373076056158675, + "loss": 0.8844, + "step": 2340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001637016581683306, + "loss": 1.1606, + "step": 2341 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016367254669260749, + "loss": 0.6206, + "step": 2342 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016364342613856816, + "loss": 0.7225, + "step": 2343 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016361429651036446, + "loss": 1.1782, + "step": 2344 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016358515781214977, + "loss": 1.0911, + "step": 2345 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016355601004807856, + "loss": 1.2727, + "step": 2346 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016352685322230663, + "loss": 0.8294, + "step": 2347 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016349768733899117, + "loss": 1.1661, + "step": 2348 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016346851240229057, + "loss": 0.8267, + "step": 2349 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016343932841636456, + "loss": 1.2873, + "step": 2350 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016341013538537412, + "loss": 1.2459, + "step": 2351 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016338093331348156, + "loss": 0.8939, + "step": 2352 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016335172220485042, + "loss": 1.024, + "step": 2353 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001633225020636456, + "loss": 0.9981, + "step": 2354 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016329327289403325, + "loss": 1.331, + "step": 2355 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016326403470018084, + "loss": 0.7446, + "step": 2356 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016323478748625703, + "loss": 1.1931, + "step": 2357 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016320553125643187, + "loss": 1.1287, + "step": 2358 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016317626601487667, + "loss": 1.109, + "step": 2359 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016314699176576402, + "loss": 0.9946, + "step": 2360 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016311770851326778, + "loss": 0.8347, + "step": 2361 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016308841626156307, + "loss": 0.9214, + "step": 2362 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001630591150148264, + "loss": 0.5907, + "step": 2363 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016302980477723539, + "loss": 1.2412, + "step": 2364 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016300048555296915, + "loss": 1.2908, + "step": 2365 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016297115734620788, + "loss": 1.2345, + "step": 2366 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016294182016113315, + "loss": 1.0418, + "step": 2367 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016291247400192785, + "loss": 1.1457, + "step": 2368 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016288311887277608, + "loss": 1.2529, + "step": 2369 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016285375477786322, + "loss": 1.0013, + "step": 2370 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016282438172137597, + "loss": 0.943, + "step": 2371 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016279499970750226, + "loss": 0.7009, + "step": 2372 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016276560874043137, + "loss": 0.9408, + "step": 2373 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627362088243538, + "loss": 1.1788, + "step": 2374 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627067999634613, + "loss": 0.8106, + "step": 2375 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016267738216194696, + "loss": 1.1695, + "step": 2376 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001626479554240051, + "loss": 0.9209, + "step": 2377 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016261851975383137, + "loss": 0.9911, + "step": 2378 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016258907515562262, + "loss": 1.3819, + "step": 2379 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162559621633577, + "loss": 0.8926, + "step": 2380 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162530159191894, + "loss": 1.0896, + "step": 2381 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016250068783477424, + "loss": 0.8403, + "step": 2382 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016247120756641972, + "loss": 0.7976, + "step": 2383 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001624417183910337, + "loss": 0.8881, + "step": 2384 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001624122203128207, + "loss": 0.8302, + "step": 2385 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623827133359865, + "loss": 1.3312, + "step": 2386 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623531974647381, + "loss": 1.003, + "step": 2387 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623236727032839, + "loss": 0.9487, + "step": 2388 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016229413905583342, + "loss": 1.2259, + "step": 2389 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016226459652659753, + "loss": 0.9327, + "step": 2390 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016223504511978838, + "loss": 0.7336, + "step": 2391 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016220548483961934, + "loss": 1.0454, + "step": 2392 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016217591569030505, + "loss": 1.3371, + "step": 2393 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016214633767606143, + "loss": 1.0814, + "step": 2394 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016211675080110566, + "loss": 1.2274, + "step": 2395 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001620871550696562, + "loss": 0.9775, + "step": 2396 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016205755048593273, + "loss": 1.0323, + "step": 2397 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016202793705415622, + "loss": 1.5101, + "step": 2398 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016199831477854893, + "loss": 0.8118, + "step": 2399 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001619686836633343, + "loss": 1.0233, + "step": 2400 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016193904371273715, + "loss": 0.9038, + "step": 2401 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016190939493098344, + "loss": 0.875, + "step": 2402 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016187973732230038, + "loss": 1.3274, + "step": 2403 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016185007089091665, + "loss": 1.081, + "step": 2404 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016182039564106192, + "loss": 1.0841, + "step": 2405 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016179071157696728, + "loss": 1.3208, + "step": 2406 + }, + { + "epoch": 2.86, + "learning_rate": 0.000161761018702865, + "loss": 1.1854, + "step": 2407 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617313170229887, + "loss": 1.0651, + "step": 2408 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617016065415731, + "loss": 1.1398, + "step": 2409 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016167188726285434, + "loss": 1.2778, + "step": 2410 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016164215919106968, + "loss": 1.6758, + "step": 2411 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001616124223304577, + "loss": 0.8341, + "step": 2412 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016158267668525832, + "loss": 0.9513, + "step": 2413 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016155292225971253, + "loss": 0.9617, + "step": 2414 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016152315905806268, + "loss": 0.8664, + "step": 2415 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016149338708455237, + "loss": 1.331, + "step": 2416 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016146360634342643, + "loss": 1.4212, + "step": 2417 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016143381683893094, + "loss": 1.2126, + "step": 2418 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016140401857531322, + "loss": 0.934, + "step": 2419 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016137421155682183, + "loss": 1.2417, + "step": 2420 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001613443957877067, + "loss": 1.637, + "step": 2421 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016131457127221881, + "loss": 1.1456, + "step": 2422 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016128473801461053, + "loss": 0.9402, + "step": 2423 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612548960191354, + "loss": 1.3797, + "step": 2424 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612250452900483, + "loss": 0.8191, + "step": 2425 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001611951858316052, + "loss": 1.1725, + "step": 2426 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016116531764806346, + "loss": 1.5701, + "step": 2427 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016113544074368164, + "loss": 1.0591, + "step": 2428 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016110555512271953, + "loss": 1.03, + "step": 2429 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001610756607894382, + "loss": 1.1829, + "step": 2430 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016104575774809985, + "loss": 1.2222, + "step": 2431 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016101584600296804, + "loss": 1.1537, + "step": 2432 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016098592555830753, + "loss": 1.0973, + "step": 2433 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016095599641838436, + "loss": 1.0793, + "step": 2434 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016092605858746573, + "loss": 1.3484, + "step": 2435 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608961120698201, + "loss": 1.1689, + "step": 2436 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016086615686971726, + "loss": 1.0864, + "step": 2437 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016083619299142813, + "loss": 1.2451, + "step": 2438 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608062204392249, + "loss": 0.9593, + "step": 2439 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016077623921738102, + "loss": 0.9816, + "step": 2440 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016074624933017112, + "loss": 1.0845, + "step": 2441 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016071625078187114, + "loss": 0.9875, + "step": 2442 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001606862435767582, + "loss": 0.8758, + "step": 2443 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016065622771911067, + "loss": 0.9499, + "step": 2444 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016062620321320823, + "loss": 1.1133, + "step": 2445 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605961700633316, + "loss": 0.7228, + "step": 2446 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016056612827376293, + "loss": 1.2297, + "step": 2447 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605360778487855, + "loss": 1.0251, + "step": 2448 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016050601879268386, + "loss": 0.8097, + "step": 2449 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016047595110974376, + "loss": 0.9872, + "step": 2450 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001604458748042522, + "loss": 1.1119, + "step": 2451 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001604157898804974, + "loss": 0.8256, + "step": 2452 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016038569634276882, + "loss": 0.9036, + "step": 2453 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016035559419535716, + "loss": 1.1173, + "step": 2454 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016032548344255428, + "loss": 1.3173, + "step": 2455 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016029536408865337, + "loss": 0.717, + "step": 2456 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016026523613794878, + "loss": 0.9806, + "step": 2457 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016023509959473605, + "loss": 1.1509, + "step": 2458 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016020495446331207, + "loss": 1.0454, + "step": 2459 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601748007479748, + "loss": 1.183, + "step": 2460 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601446384530236, + "loss": 1.2611, + "step": 2461 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016011446758275888, + "loss": 1.0377, + "step": 2462 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016008428814148236, + "loss": 1.2111, + "step": 2463 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016005410013349698, + "loss": 1.0952, + "step": 2464 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016002390356310685, + "loss": 0.7589, + "step": 2465 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015999369843461742, + "loss": 0.8543, + "step": 2466 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015996348475233525, + "loss": 1.1509, + "step": 2467 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001599332625205681, + "loss": 1.287, + "step": 2468 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015990303174362512, + "loss": 1.0401, + "step": 2469 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001598727924258164, + "loss": 1.0247, + "step": 2470 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015984254457145354, + "loss": 1.1537, + "step": 2471 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015981228818484917, + "loss": 0.9606, + "step": 2472 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001597820232703172, + "loss": 0.8709, + "step": 2473 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015975174983217275, + "loss": 1.2827, + "step": 2474 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015972146787473213, + "loss": 0.8057, + "step": 2475 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001596911774023129, + "loss": 1.0857, + "step": 2476 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015966087841923386, + "loss": 1.1731, + "step": 2477 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001596305709298149, + "loss": 0.8871, + "step": 2478 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015960025493837727, + "loss": 1.0671, + "step": 2479 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015956993044924334, + "loss": 1.3735, + "step": 2480 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015953959746673675, + "loss": 1.4655, + "step": 2481 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015950925599518228, + "loss": 1.3975, + "step": 2482 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015947890603890602, + "loss": 0.9468, + "step": 2483 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001594485476022352, + "loss": 0.9976, + "step": 2484 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015941818068949818, + "loss": 0.6732, + "step": 2485 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015938780530502474, + "loss": 0.9848, + "step": 2486 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015935742145314568, + "loss": 1.2441, + "step": 2487 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001593270291381931, + "loss": 0.9631, + "step": 2488 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015929662836450029, + "loss": 0.8868, + "step": 2489 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592662191364017, + "loss": 0.9063, + "step": 2490 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015923580145823303, + "loss": 0.6886, + "step": 2491 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592053753343312, + "loss": 1.0702, + "step": 2492 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591749407690343, + "loss": 1.3879, + "step": 2493 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015914449776668167, + "loss": 1.1048, + "step": 2494 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591140463316137, + "loss": 0.9921, + "step": 2495 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015908358646817225, + "loss": 1.3042, + "step": 2496 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015905311818070015, + "loss": 0.8413, + "step": 2497 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015902264147354153, + "loss": 1.5201, + "step": 2498 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589921563510417, + "loss": 1.0727, + "step": 2499 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589616628175472, + "loss": 1.0439, + "step": 2500 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589311608774057, + "loss": 1.2308, + "step": 2501 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015890065053496613, + "loss": 1.1155, + "step": 2502 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015887013179457862, + "loss": 1.3345, + "step": 2503 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015883960466059444, + "loss": 0.9551, + "step": 2504 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001588090691373661, + "loss": 1.0713, + "step": 2505 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015877852522924732, + "loss": 1.299, + "step": 2506 + }, + { + "epoch": 2.98, + "learning_rate": 0.000158747972940593, + "loss": 0.8535, + "step": 2507 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001587174122757592, + "loss": 0.9924, + "step": 2508 + }, + { + "epoch": 2.98, + "eval_loss": 2.328662395477295, + "eval_runtime": 283.7765, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2508 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001586868432391032, + "loss": 1.0512, + "step": 2509 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015865626583498355, + "loss": 1.2775, + "step": 2510 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015862568006775983, + "loss": 0.7054, + "step": 2511 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015859508594179294, + "loss": 0.8524, + "step": 2512 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015856448346144496, + "loss": 0.9871, + "step": 2513 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015853387263107909, + "loss": 0.8642, + "step": 2514 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015850325345505975, + "loss": 1.1789, + "step": 2515 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015847262593775266, + "loss": 1.2765, + "step": 2516 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015844199008352458, + "loss": 0.6272, + "step": 2517 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015841134589674352, + "loss": 1.3037, + "step": 2518 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015838069338177863, + "loss": 1.054, + "step": 2519 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015835003254300039, + "loss": 1.1942, + "step": 2520 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015831936338478025, + "loss": 0.8866, + "step": 2521 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015828868591149104, + "loss": 1.1444, + "step": 2522 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015825800012750666, + "loss": 0.8597, + "step": 2523 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001582273060372023, + "loss": 0.7731, + "step": 2524 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015819660364495416, + "loss": 1.1953, + "step": 2525 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001581658929551398, + "loss": 1.3946, + "step": 2526 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015813517397213791, + "loss": 1.0173, + "step": 2527 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015810444670032831, + "loss": 1.1762, + "step": 2528 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015807371114409202, + "loss": 0.7283, + "step": 2529 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015804296730781135, + "loss": 1.1515, + "step": 2530 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015801221519586958, + "loss": 0.9389, + "step": 2531 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001579814548126514, + "loss": 1.1869, + "step": 2532 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015795068616254247, + "loss": 1.2957, + "step": 2533 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015791990924992981, + "loss": 1.0514, + "step": 2534 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015788912407920148, + "loss": 0.6762, + "step": 2535 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015785833065474683, + "loss": 0.4121, + "step": 2536 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015782752898095627, + "loss": 0.4532, + "step": 2537 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577967190622215, + "loss": 0.4847, + "step": 2538 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577659009029353, + "loss": 0.8313, + "step": 2539 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015773507450749172, + "loss": 0.5304, + "step": 2540 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015770423988028588, + "loss": 0.6003, + "step": 2541 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015767339702571414, + "loss": 0.3988, + "step": 2542 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015764254594817398, + "loss": 0.6133, + "step": 2543 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001576116866520642, + "loss": 0.4858, + "step": 2544 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015758081914178456, + "loss": 0.3691, + "step": 2545 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575499434217361, + "loss": 0.5441, + "step": 2546 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575190594963211, + "loss": 0.4605, + "step": 2547 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015748816736994284, + "loss": 0.3681, + "step": 2548 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015745726704700593, + "loss": 0.4113, + "step": 2549 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015742635853191608, + "loss": 0.5233, + "step": 2550 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015739544182908014, + "loss": 0.356, + "step": 2551 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015736451694290616, + "loss": 0.4105, + "step": 2552 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015733358387780337, + "loss": 0.4451, + "step": 2553 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015730264263818212, + "loss": 0.5023, + "step": 2554 + }, + { + "epoch": 3.02, + "learning_rate": 0.000157271693228454, + "loss": 0.3671, + "step": 2555 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001572407356530317, + "loss": 0.7077, + "step": 2556 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015720976991632913, + "loss": 0.4439, + "step": 2557 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015717879602276122, + "loss": 0.5961, + "step": 2558 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001571478139767443, + "loss": 0.4269, + "step": 2559 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015711682378269565, + "loss": 0.3427, + "step": 2560 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015708582544503386, + "loss": 0.5736, + "step": 2561 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015705481896817854, + "loss": 0.3707, + "step": 2562 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001570238043565506, + "loss": 0.4076, + "step": 2563 + }, + { + "epoch": 3.03, + "learning_rate": 0.000156992781614572, + "loss": 0.6514, + "step": 2564 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015696175074666598, + "loss": 0.4012, + "step": 2565 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001569307117572568, + "loss": 0.3492, + "step": 2566 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015689966465076992, + "loss": 0.4121, + "step": 2567 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015686860943163206, + "loss": 0.5769, + "step": 2568 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015683754610427094, + "loss": 0.4872, + "step": 2569 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015680647467311557, + "loss": 0.5518, + "step": 2570 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015677539514259608, + "loss": 0.411, + "step": 2571 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015674430751714361, + "loss": 0.3443, + "step": 2572 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015671321180119074, + "loss": 0.3706, + "step": 2573 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566821079991709, + "loss": 0.6168, + "step": 2574 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566509961155189, + "loss": 0.3726, + "step": 2575 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015661987615467058, + "loss": 0.3976, + "step": 2576 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015658874812106297, + "loss": 0.3697, + "step": 2577 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015655761201913425, + "loss": 0.2759, + "step": 2578 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015652646785332378, + "loss": 0.3572, + "step": 2579 + }, + { + "epoch": 3.05, + "learning_rate": 0.000156495315628072, + "loss": 0.5333, + "step": 2580 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015646415534782056, + "loss": 0.4004, + "step": 2581 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001564329870170122, + "loss": 0.4736, + "step": 2582 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015640181064009088, + "loss": 0.4814, + "step": 2583 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015637062622150168, + "loss": 0.3351, + "step": 2584 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015633943376569081, + "loss": 0.4497, + "step": 2585 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015630823327710558, + "loss": 0.4202, + "step": 2586 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015627702476019457, + "loss": 0.5934, + "step": 2587 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001562458082194074, + "loss": 0.4664, + "step": 2588 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015621458365919487, + "loss": 0.4077, + "step": 2589 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015618335108400893, + "loss": 0.5244, + "step": 2590 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015615211049830268, + "loss": 0.5042, + "step": 2591 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015612086190653027, + "loss": 0.3442, + "step": 2592 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015608960531314717, + "loss": 0.6337, + "step": 2593 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015605834072260984, + "loss": 0.3542, + "step": 2594 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001560270681393759, + "loss": 0.5113, + "step": 2595 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001559957875679042, + "loss": 0.4346, + "step": 2596 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015596449901265463, + "loss": 0.5231, + "step": 2597 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015593320247808822, + "loss": 0.5193, + "step": 2598 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001559018979686673, + "loss": 0.3575, + "step": 2599 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015587058548885505, + "loss": 0.6356, + "step": 2600 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015583926504311605, + "loss": 0.3313, + "step": 2601 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015580793663591585, + "loss": 0.356, + "step": 2602 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015577660027172127, + "loss": 0.5498, + "step": 2603 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557452559550001, + "loss": 0.3973, + "step": 2604 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557139036902215, + "loss": 0.4751, + "step": 2605 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015568254348185544, + "loss": 0.4297, + "step": 2606 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015565117533437335, + "loss": 0.4299, + "step": 2607 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015561979925224754, + "loss": 0.4651, + "step": 2608 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015558841523995162, + "loss": 0.474, + "step": 2609 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015555702330196023, + "loss": 0.4143, + "step": 2610 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001555256234427492, + "loss": 0.393, + "step": 2611 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015549421566679546, + "loss": 0.3738, + "step": 2612 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015546279997857704, + "loss": 0.4394, + "step": 2613 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001554313763825732, + "loss": 0.3702, + "step": 2614 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015539994488326418, + "loss": 0.4594, + "step": 2615 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015536850548513147, + "loss": 0.3249, + "step": 2616 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015533705819265764, + "loss": 0.3857, + "step": 2617 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001553056030103264, + "loss": 0.3272, + "step": 2618 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015527413994262257, + "loss": 0.5204, + "step": 2619 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015524266899403206, + "loss": 0.3653, + "step": 2620 + }, + { + "epoch": 3.1, + "learning_rate": 0.000155211190169042, + "loss": 0.4698, + "step": 2621 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001551797034721405, + "loss": 0.5949, + "step": 2622 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015514820890781693, + "loss": 0.4074, + "step": 2623 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015511670648056178, + "loss": 0.3586, + "step": 2624 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550851961948665, + "loss": 0.6494, + "step": 2625 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015505367805522383, + "loss": 0.4914, + "step": 2626 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550221520661276, + "loss": 0.4594, + "step": 2627 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015499061823207266, + "loss": 0.4102, + "step": 2628 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015495907655755506, + "loss": 0.4229, + "step": 2629 + }, + { + "epoch": 3.11, + "learning_rate": 0.000154927527047072, + "loss": 0.7218, + "step": 2630 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001548959697051217, + "loss": 0.6929, + "step": 2631 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015486440453620358, + "loss": 0.3628, + "step": 2632 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015483283154481815, + "loss": 0.4433, + "step": 2633 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015480125073546704, + "loss": 0.3912, + "step": 2634 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547696621126529, + "loss": 0.3682, + "step": 2635 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015473806568087968, + "loss": 0.354, + "step": 2636 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547064614446523, + "loss": 0.4789, + "step": 2637 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001546748494084768, + "loss": 0.382, + "step": 2638 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015464322957686041, + "loss": 0.4954, + "step": 2639 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015461160195431148, + "loss": 0.3273, + "step": 2640 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545799665453393, + "loss": 0.3414, + "step": 2641 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015454832335445447, + "loss": 0.5479, + "step": 2642 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545166723861686, + "loss": 0.4963, + "step": 2643 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015448501364499445, + "loss": 0.5547, + "step": 2644 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001544533471354458, + "loss": 0.4637, + "step": 2645 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015442167286203767, + "loss": 0.4248, + "step": 2646 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015438999082928608, + "loss": 0.4213, + "step": 2647 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015435830104170822, + "loss": 0.3734, + "step": 2648 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015432660350382234, + "loss": 0.4627, + "step": 2649 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001542948982201479, + "loss": 0.3422, + "step": 2650 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015426318519520525, + "loss": 0.4409, + "step": 2651 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015423146443351607, + "loss": 0.3717, + "step": 2652 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015419973593960298, + "loss": 0.4349, + "step": 2653 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015416799971798985, + "loss": 0.5349, + "step": 2654 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001541362557732015, + "loss": 0.4511, + "step": 2655 + }, + { + "epoch": 3.15, + "learning_rate": 0.000154104504109764, + "loss": 0.5997, + "step": 2656 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015407274473220434, + "loss": 0.661, + "step": 2657 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015404097764505083, + "loss": 0.3456, + "step": 2658 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015400920285283268, + "loss": 0.3416, + "step": 2659 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015397742036008034, + "loss": 0.4707, + "step": 2660 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015394563017132526, + "loss": 0.3221, + "step": 2661 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015391383229110007, + "loss": 0.6108, + "step": 2662 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015388202672393834, + "loss": 0.5504, + "step": 2663 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015385021347437498, + "loss": 0.3973, + "step": 2664 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015381839254694583, + "loss": 0.5149, + "step": 2665 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015378656394618787, + "loss": 0.5853, + "step": 2666 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001537547276766391, + "loss": 0.517, + "step": 2667 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015372288374283875, + "loss": 0.5485, + "step": 2668 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015369103214932703, + "loss": 0.4907, + "step": 2669 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001536591729006453, + "loss": 0.3169, + "step": 2670 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015362730600133596, + "loss": 0.5431, + "step": 2671 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015359543145594258, + "loss": 0.2586, + "step": 2672 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015356354926900979, + "loss": 0.5251, + "step": 2673 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015353165944508325, + "loss": 0.4104, + "step": 2674 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015349976198870973, + "loss": 0.4825, + "step": 2675 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015346785690443718, + "loss": 0.5274, + "step": 2676 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534359441968145, + "loss": 0.3878, + "step": 2677 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534040238703918, + "loss": 0.5132, + "step": 2678 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015337209592972023, + "loss": 0.5145, + "step": 2679 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015334016037935196, + "loss": 0.5548, + "step": 2680 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015330821722384037, + "loss": 0.7494, + "step": 2681 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015327626646773976, + "loss": 0.5569, + "step": 2682 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015324430811560573, + "loss": 0.2622, + "step": 2683 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001532123421719948, + "loss": 0.3749, + "step": 2684 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015318036864146457, + "loss": 0.3959, + "step": 2685 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531483875285738, + "loss": 0.5243, + "step": 2686 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531163988378823, + "loss": 0.3115, + "step": 2687 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015308440257395093, + "loss": 0.2385, + "step": 2688 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015305239874134174, + "loss": 0.4431, + "step": 2689 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001530203873446177, + "loss": 0.378, + "step": 2690 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015298836838834298, + "loss": 0.4521, + "step": 2691 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015295634187708279, + "loss": 0.6309, + "step": 2692 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015292430781540335, + "loss": 0.4355, + "step": 2693 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015289226620787208, + "loss": 0.4537, + "step": 2694 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001528602170590574, + "loss": 0.4305, + "step": 2695 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015282816037352878, + "loss": 0.5355, + "step": 2696 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015279609615585687, + "loss": 0.5243, + "step": 2697 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001527640244106133, + "loss": 0.5334, + "step": 2698 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015273194514237078, + "loss": 0.5409, + "step": 2699 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001526998583557031, + "loss": 0.4042, + "step": 2700 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015266776405518524, + "loss": 0.5536, + "step": 2701 + }, + { + "epoch": 3.2, + "learning_rate": 0.000152635662245393, + "loss": 0.2743, + "step": 2702 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015260355293090353, + "loss": 0.4762, + "step": 2703 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015257143611629482, + "loss": 0.4552, + "step": 2704 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001525393118061461, + "loss": 0.5395, + "step": 2705 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001525071800050375, + "loss": 0.4297, + "step": 2706 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015247504071755046, + "loss": 0.364, + "step": 2707 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015244289394826722, + "loss": 0.9499, + "step": 2708 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015241073970177126, + "loss": 0.579, + "step": 2709 + }, + { + "epoch": 3.21, + "learning_rate": 0.000152378577982647, + "loss": 0.3111, + "step": 2710 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001523464087954801, + "loss": 0.3345, + "step": 2711 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015231423214485715, + "loss": 0.4628, + "step": 2712 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015228204803536586, + "loss": 0.4803, + "step": 2713 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001522498564715949, + "loss": 0.4164, + "step": 2714 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015221765745813417, + "loss": 0.6468, + "step": 2715 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015218545099957449, + "loss": 0.4495, + "step": 2716 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015215323710050785, + "loss": 0.4184, + "step": 2717 + }, + { + "epoch": 3.22, + "eval_loss": 2.9206559658050537, + "eval_runtime": 283.9002, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2717 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015212101576552722, + "loss": 0.4215, + "step": 2718 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015208878699922668, + "loss": 1.4488, + "step": 2719 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520565508062013, + "loss": 0.4449, + "step": 2720 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520243071910473, + "loss": 0.2853, + "step": 2721 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015199205615836191, + "loss": 0.4572, + "step": 2722 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015195979771274342, + "loss": 0.4436, + "step": 2723 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001519275318587912, + "loss": 0.38, + "step": 2724 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015189525860110563, + "loss": 0.4956, + "step": 2725 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015186297794428816, + "loss": 0.8514, + "step": 2726 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015183068989294132, + "loss": 0.4518, + "step": 2727 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015179839445166873, + "loss": 0.5581, + "step": 2728 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015176609162507497, + "loss": 0.5828, + "step": 2729 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015173378141776568, + "loss": 0.4109, + "step": 2730 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015170146383434768, + "loss": 0.5762, + "step": 2731 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015166913887942868, + "loss": 0.4502, + "step": 2732 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015163680655761757, + "loss": 0.3736, + "step": 2733 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015160446687352417, + "loss": 0.3771, + "step": 2734 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015157211983175947, + "loss": 0.469, + "step": 2735 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015153976543693542, + "loss": 0.665, + "step": 2736 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015150740369366508, + "loss": 0.3495, + "step": 2737 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514750346065625, + "loss": 0.4513, + "step": 2738 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514426581802428, + "loss": 0.4571, + "step": 2739 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015141027441932216, + "loss": 0.4197, + "step": 2740 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015137788332841785, + "loss": 0.3396, + "step": 2741 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015134548491214806, + "loss": 0.3547, + "step": 2742 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015131307917513214, + "loss": 0.3073, + "step": 2743 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015128066612199044, + "loss": 0.7091, + "step": 2744 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015124824575734428, + "loss": 0.2845, + "step": 2745 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015121581808581622, + "loss": 0.2903, + "step": 2746 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015118338311202964, + "loss": 0.4065, + "step": 2747 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015115094084060916, + "loss": 0.6152, + "step": 2748 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015111849127618022, + "loss": 0.5352, + "step": 2749 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001510860344233695, + "loss": 0.414, + "step": 2750 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015105357028680457, + "loss": 0.4756, + "step": 2751 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015102109887111422, + "loss": 0.4644, + "step": 2752 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015098862018092808, + "loss": 0.4231, + "step": 2753 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015095613422087692, + "loss": 0.4617, + "step": 2754 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001509236409955925, + "loss": 0.5876, + "step": 2755 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508911405097077, + "loss": 0.5696, + "step": 2756 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015085863276785637, + "loss": 0.3826, + "step": 2757 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508261177746734, + "loss": 0.4338, + "step": 2758 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001507935955347947, + "loss": 0.3546, + "step": 2759 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015076106605285724, + "loss": 0.413, + "step": 2760 + }, + { + "epoch": 3.27, + "learning_rate": 0.000150728529333499, + "loss": 0.3954, + "step": 2761 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015069598538135906, + "loss": 0.5214, + "step": 2762 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001506634342010774, + "loss": 0.5239, + "step": 2763 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015063087579729519, + "loss": 0.8681, + "step": 2764 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015059831017465449, + "loss": 0.4616, + "step": 2765 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015056573733779848, + "loss": 0.4721, + "step": 2766 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015053315729137128, + "loss": 0.4449, + "step": 2767 + }, + { + "epoch": 3.28, + "learning_rate": 0.0001505005700400182, + "loss": 0.569, + "step": 2768 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015046797558838535, + "loss": 0.4926, + "step": 2769 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015043537394112007, + "loss": 0.462, + "step": 2770 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015040276510287063, + "loss": 0.6983, + "step": 2771 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015037014907828632, + "loss": 0.4644, + "step": 2772 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001503375258720175, + "loss": 0.5924, + "step": 2773 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015030489548871544, + "loss": 0.5282, + "step": 2774 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015027225793303264, + "loss": 0.4757, + "step": 2775 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015023961320962247, + "loss": 0.5014, + "step": 2776 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001502069613231393, + "loss": 0.3455, + "step": 2777 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015017430227823864, + "loss": 0.4525, + "step": 2778 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001501416360795769, + "loss": 0.51, + "step": 2779 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015010896273181165, + "loss": 0.3766, + "step": 2780 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001500762822396013, + "loss": 0.3162, + "step": 2781 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015004359460760546, + "loss": 0.406, + "step": 2782 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015001089984048463, + "loss": 0.4671, + "step": 2783 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014997819794290034, + "loss": 0.4299, + "step": 2784 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014994548891951524, + "loss": 0.5494, + "step": 2785 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001499127727749929, + "loss": 0.351, + "step": 2786 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014988004951399785, + "loss": 0.3807, + "step": 2787 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014984731914119586, + "loss": 0.3999, + "step": 2788 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001498145816612534, + "loss": 0.7609, + "step": 2789 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014978183707883827, + "loss": 0.4466, + "step": 2790 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014974908539861905, + "loss": 0.592, + "step": 2791 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014971632662526545, + "loss": 0.4786, + "step": 2792 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014968356076344814, + "loss": 0.4087, + "step": 2793 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001496507878178388, + "loss": 0.3811, + "step": 2794 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014961800779311014, + "loss": 0.4091, + "step": 2795 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014958522069393593, + "loss": 0.6861, + "step": 2796 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014955242652499084, + "loss": 0.3346, + "step": 2797 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014951962529095064, + "loss": 0.5417, + "step": 2798 + }, + { + "epoch": 3.32, + "learning_rate": 0.000149486816996492, + "loss": 0.7325, + "step": 2799 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014945400164629278, + "loss": 0.5007, + "step": 2800 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014942117924503164, + "loss": 0.4217, + "step": 2801 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014938834979738835, + "loss": 0.5265, + "step": 2802 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014935551330804372, + "loss": 0.4376, + "step": 2803 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001493226697816795, + "loss": 0.5068, + "step": 2804 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014928981922297842, + "loss": 0.6248, + "step": 2805 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492569616366243, + "loss": 0.593, + "step": 2806 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492240970273019, + "loss": 0.6713, + "step": 2807 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014919122539969697, + "loss": 0.5736, + "step": 2808 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014915834675849633, + "loss": 0.3006, + "step": 2809 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014912546110838775, + "loss": 0.5175, + "step": 2810 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014909256845405998, + "loss": 0.52, + "step": 2811 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014905966880020282, + "loss": 0.5491, + "step": 2812 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014902676215150702, + "loss": 0.6007, + "step": 2813 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001489938485126644, + "loss": 0.6552, + "step": 2814 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014896092788836763, + "loss": 0.3624, + "step": 2815 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001489280002833106, + "loss": 0.2626, + "step": 2816 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014889506570218796, + "loss": 0.409, + "step": 2817 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014886212414969553, + "loss": 0.473, + "step": 2818 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014882917563052998, + "loss": 0.4205, + "step": 2819 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014879622014938915, + "loss": 0.4603, + "step": 2820 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487632577109717, + "loss": 0.3522, + "step": 2821 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487302883199774, + "loss": 0.3787, + "step": 2822 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014869731198110695, + "loss": 0.6, + "step": 2823 + }, + { + "epoch": 3.35, + "learning_rate": 0.000148664328699062, + "loss": 0.4291, + "step": 2824 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014863133847854533, + "loss": 0.4358, + "step": 2825 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485983413242606, + "loss": 0.4144, + "step": 2826 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485653372409125, + "loss": 0.842, + "step": 2827 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014853232623320662, + "loss": 0.3398, + "step": 2828 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014849930830584972, + "loss": 0.5005, + "step": 2829 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014846628346354933, + "loss": 0.5777, + "step": 2830 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014843325171101413, + "loss": 0.3953, + "step": 2831 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014840021305295373, + "loss": 0.4056, + "step": 2832 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014836716749407872, + "loss": 0.7682, + "step": 2833 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001483341150391006, + "loss": 0.3208, + "step": 2834 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014830105569273204, + "loss": 0.4317, + "step": 2835 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014826798945968654, + "loss": 0.363, + "step": 2836 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014823491634467862, + "loss": 0.3784, + "step": 2837 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014820183635242374, + "loss": 0.9267, + "step": 2838 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001481687494876385, + "loss": 0.4245, + "step": 2839 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014813565575504022, + "loss": 0.3929, + "step": 2840 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014810255515934747, + "loss": 0.5171, + "step": 2841 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014806944770527958, + "loss": 0.5181, + "step": 2842 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014803633339755703, + "loss": 0.4765, + "step": 2843 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014800321224090114, + "loss": 0.4433, + "step": 2844 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014797008424003428, + "loss": 0.461, + "step": 2845 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001479369493996798, + "loss": 0.5688, + "step": 2846 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014790380772456197, + "loss": 0.4822, + "step": 2847 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001478706592194061, + "loss": 0.4993, + "step": 2848 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014783750388893842, + "loss": 0.3967, + "step": 2849 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014780434173788617, + "loss": 0.4708, + "step": 2850 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014777117277097758, + "loss": 0.5721, + "step": 2851 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014773799699294176, + "loss": 0.5276, + "step": 2852 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014770481440850891, + "loss": 0.4135, + "step": 2853 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001476716250224101, + "loss": 0.716, + "step": 2854 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014763842883937743, + "loss": 0.3663, + "step": 2855 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014760522586414396, + "loss": 0.4105, + "step": 2856 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014757201610144372, + "loss": 0.4554, + "step": 2857 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014753879955601163, + "loss": 0.4366, + "step": 2858 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001475055762325837, + "loss": 0.3752, + "step": 2859 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014747234613589685, + "loss": 0.3747, + "step": 2860 + }, + { + "epoch": 3.39, + "learning_rate": 0.000147439109270689, + "loss": 0.5533, + "step": 2861 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014740586564169892, + "loss": 0.4962, + "step": 2862 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014737261525366648, + "loss": 0.5318, + "step": 2863 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014733935811133244, + "loss": 0.4592, + "step": 2864 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014730609421943855, + "loss": 0.429, + "step": 2865 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014727282358272754, + "loss": 0.4163, + "step": 2866 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014723954620594304, + "loss": 0.4811, + "step": 2867 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001472062620938297, + "loss": 0.4662, + "step": 2868 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014717297125113311, + "loss": 0.531, + "step": 2869 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001471396736825998, + "loss": 0.3233, + "step": 2870 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014710636939297724, + "loss": 0.4171, + "step": 2871 + }, + { + "epoch": 3.4, + "learning_rate": 0.000147073058387014, + "loss": 0.5412, + "step": 2872 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014703974066945943, + "loss": 0.4357, + "step": 2873 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014700641624506392, + "loss": 0.3889, + "step": 2874 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469730851185788, + "loss": 0.456, + "step": 2875 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014693974729475636, + "loss": 0.4365, + "step": 2876 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469064027783499, + "loss": 0.3947, + "step": 2877 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014687305157411355, + "loss": 0.5718, + "step": 2878 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001468396936868025, + "loss": 0.4652, + "step": 2879 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014680632912117286, + "loss": 0.4242, + "step": 2880 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001467729578819817, + "loss": 0.5045, + "step": 2881 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014673957997398695, + "loss": 0.4098, + "step": 2882 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014670619540194766, + "loss": 0.597, + "step": 2883 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014667280417062374, + "loss": 0.5208, + "step": 2884 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014663940628477598, + "loss": 0.4881, + "step": 2885 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014660600174916627, + "loss": 0.5234, + "step": 2886 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001465725905685573, + "loss": 0.439, + "step": 2887 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014653917274771284, + "loss": 0.4498, + "step": 2888 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014650574829139747, + "loss": 0.4837, + "step": 2889 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014647231720437686, + "loss": 0.4232, + "step": 2890 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014643887949141753, + "loss": 0.4467, + "step": 2891 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014640543515728695, + "loss": 0.3566, + "step": 2892 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014637198420675354, + "loss": 0.3888, + "step": 2893 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014633852664458673, + "loss": 0.326, + "step": 2894 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001463050624755568, + "loss": 0.3608, + "step": 2895 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014627159170443502, + "loss": 0.5326, + "step": 2896 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014623811433599359, + "loss": 0.3171, + "step": 2897 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014620463037500568, + "loss": 0.4619, + "step": 2898 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014617113982624526, + "loss": 0.7739, + "step": 2899 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014613764269448751, + "loss": 0.4327, + "step": 2900 + }, + { + "epoch": 3.44, + "learning_rate": 0.0001461041389845083, + "loss": 0.6078, + "step": 2901 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014607062870108456, + "loss": 0.3863, + "step": 2902 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014603711184899408, + "loss": 0.4787, + "step": 2903 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014600358843301568, + "loss": 0.2997, + "step": 2904 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014597005845792905, + "loss": 0.3657, + "step": 2905 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014593652192851486, + "loss": 0.334, + "step": 2906 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014590297884955463, + "loss": 0.6809, + "step": 2907 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001458694292258309, + "loss": 0.4739, + "step": 2908 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014583587306212713, + "loss": 0.4139, + "step": 2909 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014580231036322768, + "loss": 0.3307, + "step": 2910 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014576874113391789, + "loss": 0.4155, + "step": 2911 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014573516537898394, + "loss": 0.4461, + "step": 2912 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014570158310321305, + "loss": 0.4775, + "step": 2913 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001456679943113933, + "loss": 0.344, + "step": 2914 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014563439900831373, + "loss": 0.3568, + "step": 2915 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014560079719876424, + "loss": 0.3808, + "step": 2916 + }, + { + "epoch": 3.46, + "learning_rate": 0.0001455671888875358, + "loss": 0.5467, + "step": 2917 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014553357407942022, + "loss": 0.5267, + "step": 2918 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014549995277921015, + "loss": 0.4476, + "step": 2919 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014546632499169937, + "loss": 0.4463, + "step": 2920 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014543269072168235, + "loss": 0.5553, + "step": 2921 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014539904997395468, + "loss": 0.5476, + "step": 2922 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001453654027533128, + "loss": 0.4443, + "step": 2923 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014533174906455404, + "loss": 0.4353, + "step": 2924 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014529808891247667, + "loss": 0.4479, + "step": 2925 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014526442230187995, + "loss": 0.3951, + "step": 2926 + }, + { + "epoch": 3.47, + "eval_loss": 2.882225751876831, + "eval_runtime": 283.9462, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2926 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014523074923756394, + "loss": 0.679, + "step": 2927 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451970697243297, + "loss": 0.4178, + "step": 2928 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451633837669792, + "loss": 0.4121, + "step": 2929 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014512969137031538, + "loss": 0.3929, + "step": 2930 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014509599253914195, + "loss": 0.366, + "step": 2931 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001450622872782637, + "loss": 0.3528, + "step": 2932 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014502857559248617, + "loss": 0.5003, + "step": 2933 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014499485748661604, + "loss": 0.4901, + "step": 2934 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014496113296546067, + "loss": 0.4538, + "step": 2935 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014492740203382847, + "loss": 0.4549, + "step": 2936 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001448936646965288, + "loss": 0.5464, + "step": 2937 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014485992095837177, + "loss": 0.43, + "step": 2938 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014482617082416858, + "loss": 0.5893, + "step": 2939 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447924142987312, + "loss": 0.4947, + "step": 2940 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014475865138687262, + "loss": 0.4903, + "step": 2941 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447248820934067, + "loss": 0.4933, + "step": 2942 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014469110642314817, + "loss": 0.4516, + "step": 2943 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001446573243809127, + "loss": 0.469, + "step": 2944 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014462353597151684, + "loss": 0.6531, + "step": 2945 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014458974119977818, + "loss": 0.2754, + "step": 2946 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001445559400705151, + "loss": 0.5676, + "step": 2947 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014452213258854684, + "loss": 0.5903, + "step": 2948 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014448831875869364, + "loss": 0.5022, + "step": 2949 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001444544985857766, + "loss": 0.3509, + "step": 2950 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014442067207461775, + "loss": 0.3921, + "step": 2951 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014438683923004005, + "loss": 0.4997, + "step": 2952 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014435300005686728, + "loss": 0.6218, + "step": 2953 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014431915455992414, + "loss": 0.4097, + "step": 2954 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014428530274403632, + "loss": 0.3478, + "step": 2955 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014425144461403035, + "loss": 0.4506, + "step": 2956 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014421758017473362, + "loss": 0.4025, + "step": 2957 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014418370943097448, + "loss": 0.3838, + "step": 2958 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014414983238758217, + "loss": 0.6366, + "step": 2959 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014411594904938682, + "loss": 0.4649, + "step": 2960 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014408205942121943, + "loss": 0.3361, + "step": 2961 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014404816350791188, + "loss": 0.3692, + "step": 2962 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001440142613142971, + "loss": 0.6162, + "step": 2963 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014398035284520874, + "loss": 0.5935, + "step": 2964 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001439464381054814, + "loss": 0.545, + "step": 2965 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014391251709995061, + "loss": 0.4178, + "step": 2966 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014387858983345276, + "loss": 0.5552, + "step": 2967 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001438446563108251, + "loss": 0.4506, + "step": 2968 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014381071653690587, + "loss": 0.429, + "step": 2969 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014377677051653404, + "loss": 0.3897, + "step": 2970 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001437428182545497, + "loss": 0.4663, + "step": 2971 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014370885975579364, + "loss": 0.4643, + "step": 2972 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436748950251076, + "loss": 0.5433, + "step": 2973 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436409240673342, + "loss": 0.4967, + "step": 2974 + }, + { + "epoch": 3.53, + "learning_rate": 0.000143606946887317, + "loss": 0.3717, + "step": 2975 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014357296348990037, + "loss": 0.4166, + "step": 2976 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001435389738799296, + "loss": 0.455, + "step": 2977 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014350497806225087, + "loss": 0.4603, + "step": 2978 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014347097604171127, + "loss": 0.4325, + "step": 2979 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001434369678231587, + "loss": 0.4375, + "step": 2980 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014340295341144202, + "loss": 0.4932, + "step": 2981 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014336893281141096, + "loss": 0.5264, + "step": 2982 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014333490602791608, + "loss": 0.4677, + "step": 2983 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014330087306580887, + "loss": 0.6505, + "step": 2984 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014326683392994167, + "loss": 0.4451, + "step": 2985 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014323278862516775, + "loss": 0.4025, + "step": 2986 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001431987371563412, + "loss": 0.5084, + "step": 2987 + }, + { + "epoch": 3.54, + "learning_rate": 0.000143164679528317, + "loss": 0.4806, + "step": 2988 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014313061574595115, + "loss": 0.3954, + "step": 2989 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014309654581410024, + "loss": 0.4339, + "step": 2990 + }, + { + "epoch": 3.55, + "learning_rate": 0.000143062469737622, + "loss": 0.6739, + "step": 2991 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014302838752137487, + "loss": 0.6414, + "step": 2992 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014299429917021827, + "loss": 0.5075, + "step": 2993 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014296020468901246, + "loss": 0.4105, + "step": 2994 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014292610408261856, + "loss": 0.7371, + "step": 2995 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014289199735589852, + "loss": 0.7485, + "step": 2996 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014285788451371534, + "loss": 0.7629, + "step": 2997 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014282376556093264, + "loss": 0.3849, + "step": 2998 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014278964050241512, + "loss": 0.5355, + "step": 2999 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014275550934302823, + "loss": 0.4077, + "step": 3000 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014272137208763832, + "loss": 0.5352, + "step": 3001 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014268722874111265, + "loss": 0.5257, + "step": 3002 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014265307930831932, + "loss": 0.4265, + "step": 3003 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014261892379412728, + "loss": 0.5776, + "step": 3004 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001425847622034063, + "loss": 0.3521, + "step": 3005 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014255059454102722, + "loss": 0.6203, + "step": 3006 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014251642081186146, + "loss": 0.5238, + "step": 3007 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014248224102078152, + "loss": 0.3887, + "step": 3008 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014244805517266067, + "loss": 0.5001, + "step": 3009 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001424138632723731, + "loss": 0.555, + "step": 3010 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423796653247938, + "loss": 0.6137, + "step": 3011 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014234546133479867, + "loss": 0.8052, + "step": 3012 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423112513072644, + "loss": 0.5392, + "step": 3013 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014227703524706867, + "loss": 0.5067, + "step": 3014 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001422428131590899, + "loss": 0.4016, + "step": 3015 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014220858504820742, + "loss": 0.4165, + "step": 3016 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014217435091930141, + "loss": 0.7395, + "step": 3017 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014214011077725292, + "loss": 0.4985, + "step": 3018 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014210586462694384, + "loss": 0.4821, + "step": 3019 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014207161247325691, + "loss": 0.6046, + "step": 3020 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014203735432107576, + "loss": 0.568, + "step": 3021 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014200309017528486, + "loss": 0.7383, + "step": 3022 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001419688200407695, + "loss": 0.5296, + "step": 3023 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014193454392241592, + "loss": 0.6391, + "step": 3024 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014190026182511102, + "loss": 0.4523, + "step": 3025 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001418659737537428, + "loss": 0.482, + "step": 3026 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014183167971319998, + "loss": 0.4519, + "step": 3027 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014179737970837207, + "loss": 0.4156, + "step": 3028 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014176307374414956, + "loss": 0.5142, + "step": 3029 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014172876182542372, + "loss": 0.4068, + "step": 3030 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014169444395708666, + "loss": 0.5908, + "step": 3031 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001416601201440314, + "loss": 0.511, + "step": 3032 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014162579039115174, + "loss": 0.5165, + "step": 3033 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014159145470334235, + "loss": 0.4449, + "step": 3034 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014155711308549878, + "loss": 0.4808, + "step": 3035 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014152276554251736, + "loss": 0.5365, + "step": 3036 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014148841207929527, + "loss": 0.6016, + "step": 3037 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001414540527007307, + "loss": 0.379, + "step": 3038 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014141968741172238, + "loss": 0.6687, + "step": 3039 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014138531621717018, + "loss": 0.6219, + "step": 3040 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001413509391219746, + "loss": 0.3408, + "step": 3041 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014131655613103708, + "loss": 0.5148, + "step": 3042 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412821672492599, + "loss": 0.3811, + "step": 3043 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412477724815462, + "loss": 0.4691, + "step": 3044 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014121337183279988, + "loss": 0.6919, + "step": 3045 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001411789653079257, + "loss": 0.5804, + "step": 3046 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014114455291182933, + "loss": 0.418, + "step": 3047 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001411101346494172, + "loss": 0.4422, + "step": 3048 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410757105255966, + "loss": 0.389, + "step": 3049 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410412805452757, + "loss": 0.4083, + "step": 3050 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410068447133634, + "loss": 0.8703, + "step": 3051 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014097240303476954, + "loss": 0.4724, + "step": 3052 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014093795551440474, + "loss": 0.6257, + "step": 3053 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014090350215718048, + "loss": 0.5212, + "step": 3054 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014086904296800902, + "loss": 0.4429, + "step": 3055 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014083457795180355, + "loss": 0.3496, + "step": 3056 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014080010711347798, + "loss": 0.3402, + "step": 3057 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001407656304579471, + "loss": 0.4783, + "step": 3058 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014073114799012653, + "loss": 0.3987, + "step": 3059 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014069665971493274, + "loss": 0.4755, + "step": 3060 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014066216563728303, + "loss": 0.4792, + "step": 3061 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014062766576209546, + "loss": 0.4275, + "step": 3062 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014059316009428893, + "loss": 0.3598, + "step": 3063 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014055864863878325, + "loss": 0.4887, + "step": 3064 + }, + { + "epoch": 3.64, + "learning_rate": 0.000140524131400499, + "loss": 0.5421, + "step": 3065 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014048960838435753, + "loss": 0.352, + "step": 3066 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014045507959528118, + "loss": 0.3124, + "step": 3067 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014042054503819287, + "loss": 0.3955, + "step": 3068 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014038600471801658, + "loss": 0.455, + "step": 3069 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014035145863967692, + "loss": 0.5177, + "step": 3070 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014031690680809945, + "loss": 0.4205, + "step": 3071 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014028234922821054, + "loss": 0.4832, + "step": 3072 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402477859049373, + "loss": 0.3496, + "step": 3073 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402132168432077, + "loss": 0.5404, + "step": 3074 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014017864204795058, + "loss": 0.5106, + "step": 3075 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001401440615240955, + "loss": 0.6611, + "step": 3076 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014010947527657295, + "loss": 0.3879, + "step": 3077 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400748833103141, + "loss": 0.3054, + "step": 3078 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014004028563025108, + "loss": 0.3461, + "step": 3079 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400056822413167, + "loss": 0.482, + "step": 3080 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001399710731484447, + "loss": 0.3285, + "step": 3081 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013993645835656953, + "loss": 0.363, + "step": 3082 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013990183787062661, + "loss": 0.5092, + "step": 3083 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013986721169555194, + "loss": 0.3009, + "step": 3084 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013983257983628253, + "loss": 0.3831, + "step": 3085 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001397979422977561, + "loss": 0.3718, + "step": 3086 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013976329908491118, + "loss": 0.3401, + "step": 3087 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013972865020268722, + "loss": 0.5294, + "step": 3088 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013969399565602435, + "loss": 0.5054, + "step": 3089 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001396593354498635, + "loss": 0.4247, + "step": 3090 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013962466958914658, + "loss": 0.431, + "step": 3091 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013958999807881604, + "loss": 0.6341, + "step": 3092 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001395553209238154, + "loss": 0.5126, + "step": 3093 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013952063812908881, + "loss": 0.3775, + "step": 3094 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001394859496995813, + "loss": 0.5149, + "step": 3095 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013945125564023868, + "loss": 0.2879, + "step": 3096 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013941655595600756, + "loss": 0.5621, + "step": 3097 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013938185065183532, + "loss": 0.408, + "step": 3098 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013934713973267024, + "loss": 0.4247, + "step": 3099 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001393124232034613, + "loss": 0.4224, + "step": 3100 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001392777010691584, + "loss": 0.4142, + "step": 3101 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013924297333471204, + "loss": 0.6004, + "step": 3102 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013920824000507374, + "loss": 0.6016, + "step": 3103 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001391735010851956, + "loss": 0.4669, + "step": 3104 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013913875658003074, + "loss": 0.3987, + "step": 3105 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001391040064945329, + "loss": 0.471, + "step": 3106 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001390692508336568, + "loss": 0.6135, + "step": 3107 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013903448960235766, + "loss": 0.5369, + "step": 3108 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013899972280559183, + "loss": 0.3295, + "step": 3109 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001389649504483162, + "loss": 0.309, + "step": 3110 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013893017253548858, + "loss": 0.4026, + "step": 3111 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013889538907206755, + "loss": 0.4724, + "step": 3112 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001388606000630125, + "loss": 0.3606, + "step": 3113 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001388258055132835, + "loss": 0.4894, + "step": 3114 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387910054278416, + "loss": 0.4832, + "step": 3115 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387561998116484, + "loss": 0.4604, + "step": 3116 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013872138866966656, + "loss": 0.4377, + "step": 3117 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013868657200685934, + "loss": 0.3965, + "step": 3118 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001386517498281908, + "loss": 0.7653, + "step": 3119 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013861692213862584, + "loss": 0.5213, + "step": 3120 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013858208894313017, + "loss": 0.9296, + "step": 3121 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013854725024667016, + "loss": 0.7738, + "step": 3122 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013851240605421315, + "loss": 0.5826, + "step": 3123 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384775563707271, + "loss": 0.5502, + "step": 3124 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013844270120118085, + "loss": 0.3535, + "step": 3125 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384078405505439, + "loss": 0.4853, + "step": 3126 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013837297442378675, + "loss": 0.5819, + "step": 3127 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013833810282588044, + "loss": 0.3728, + "step": 3128 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013830322576179697, + "loss": 0.3327, + "step": 3129 + }, + { + "epoch": 3.71, + "learning_rate": 0.000138268343236509, + "loss": 0.4618, + "step": 3130 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013823345525499004, + "loss": 0.3377, + "step": 3131 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013819856182221434, + "loss": 0.3154, + "step": 3132 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013816366294315695, + "loss": 0.5116, + "step": 3133 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001381287586227937, + "loss": 0.4987, + "step": 3134 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013809384886610118, + "loss": 0.5596, + "step": 3135 + }, + { + "epoch": 3.72, + "eval_loss": 2.939779281616211, + "eval_runtime": 283.9953, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 3135 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013805893367805678, + "loss": 0.5128, + "step": 3136 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001380240130636386, + "loss": 0.3149, + "step": 3137 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013798908702782558, + "loss": 0.4984, + "step": 3138 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001379541555755974, + "loss": 0.626, + "step": 3139 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013791921871193457, + "loss": 0.4949, + "step": 3140 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013788427644181823, + "loss": 0.5654, + "step": 3141 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001378493287702305, + "loss": 0.4197, + "step": 3142 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013781437570215406, + "loss": 0.4341, + "step": 3143 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013777941724257253, + "loss": 0.3576, + "step": 3144 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013774445339647014, + "loss": 0.3098, + "step": 3145 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013770948416883205, + "loss": 0.6052, + "step": 3146 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013767450956464407, + "loss": 0.4327, + "step": 3147 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013763952958889287, + "loss": 0.4717, + "step": 3148 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001376045442465657, + "loss": 0.5263, + "step": 3149 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013756955354265085, + "loss": 0.5021, + "step": 3150 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013753455748213714, + "loss": 0.4066, + "step": 3151 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013749955607001433, + "loss": 0.3461, + "step": 3152 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013746454931127278, + "loss": 0.4318, + "step": 3153 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013742953721090372, + "loss": 0.4195, + "step": 3154 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001373945197738991, + "loss": 0.3862, + "step": 3155 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013735949700525163, + "loss": 0.5916, + "step": 3156 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013732446890995484, + "loss": 0.5336, + "step": 3157 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013728943549300295, + "loss": 0.4104, + "step": 3158 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013725439675939095, + "loss": 0.541, + "step": 3159 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013721935271411464, + "loss": 0.5173, + "step": 3160 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013718430336217045, + "loss": 0.3866, + "step": 3161 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013714924870855571, + "loss": 0.6113, + "step": 3162 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013711418875826846, + "loss": 0.5817, + "step": 3163 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370791235163075, + "loss": 0.5331, + "step": 3164 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013704405298767229, + "loss": 0.5744, + "step": 3165 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370089771773632, + "loss": 0.494, + "step": 3166 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013697389609038124, + "loss": 0.4537, + "step": 3167 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013693880973172822, + "loss": 0.5494, + "step": 3168 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013690371810640665, + "loss": 0.537, + "step": 3169 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001368686212194199, + "loss": 0.4698, + "step": 3170 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013683351907577194, + "loss": 0.5254, + "step": 3171 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013679841168046767, + "loss": 0.3857, + "step": 3172 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013676329903851254, + "loss": 0.4464, + "step": 3173 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001367281811549129, + "loss": 0.5651, + "step": 3174 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001366930580346758, + "loss": 0.4192, + "step": 3175 + }, + { + "epoch": 3.77, + "learning_rate": 0.000136657929682809, + "loss": 0.3364, + "step": 3176 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013662279610432104, + "loss": 0.3539, + "step": 3177 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013658765730422125, + "loss": 0.6074, + "step": 3178 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013655251328751957, + "loss": 0.5322, + "step": 3179 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013651736405922686, + "loss": 0.4176, + "step": 3180 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013648220962435458, + "loss": 0.4878, + "step": 3181 + }, + { + "epoch": 3.78, + "learning_rate": 0.000136447049987915, + "loss": 0.6351, + "step": 3182 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013641188515492109, + "loss": 0.4487, + "step": 3183 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363767151303866, + "loss": 0.4451, + "step": 3184 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013634153991932607, + "loss": 0.4944, + "step": 3185 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363063595267547, + "loss": 0.5932, + "step": 3186 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013627117395768833, + "loss": 0.4964, + "step": 3187 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001362359832171438, + "loss": 0.6795, + "step": 3188 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013620078731013845, + "loss": 0.3862, + "step": 3189 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361655862416905, + "loss": 0.3425, + "step": 3190 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361303800168188, + "loss": 0.4361, + "step": 3191 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001360951686405431, + "loss": 0.5774, + "step": 3192 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013605995211788365, + "loss": 0.4044, + "step": 3193 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013602473045386165, + "loss": 0.3858, + "step": 3194 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013598950365349883, + "loss": 0.6136, + "step": 3195 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013595427172181785, + "loss": 0.329, + "step": 3196 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013591903466384203, + "loss": 0.3898, + "step": 3197 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013588379248459536, + "loss": 0.4809, + "step": 3198 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013584854518910262, + "loss": 0.4108, + "step": 3199 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013581329278238927, + "loss": 0.4655, + "step": 3200 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013577803526948162, + "loss": 0.4657, + "step": 3201 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013574277265540654, + "loss": 0.4842, + "step": 3202 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013570750494519175, + "loss": 0.4593, + "step": 3203 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013567223214386564, + "loss": 0.435, + "step": 3204 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013563695425645737, + "loss": 0.7146, + "step": 3205 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013560167128799674, + "loss": 0.5027, + "step": 3206 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013556638324351442, + "loss": 0.4844, + "step": 3207 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013553109012804163, + "loss": 0.7605, + "step": 3208 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013549579194661044, + "loss": 0.396, + "step": 3209 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013546048870425356, + "loss": 0.5178, + "step": 3210 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013542518040600453, + "loss": 0.6946, + "step": 3211 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001353898670568975, + "loss": 0.5054, + "step": 3212 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013535454866196739, + "loss": 0.4495, + "step": 3213 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013531922522624982, + "loss": 0.5138, + "step": 3214 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352838967547812, + "loss": 0.4706, + "step": 3215 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013524856325259848, + "loss": 0.5193, + "step": 3216 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352132247247396, + "loss": 0.4436, + "step": 3217 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013517788117624292, + "loss": 0.4139, + "step": 3218 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001351425326121478, + "loss": 0.5937, + "step": 3219 + }, + { + "epoch": 3.82, + "learning_rate": 0.000135107179037494, + "loss": 0.3375, + "step": 3220 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013507182045732234, + "loss": 0.3712, + "step": 3221 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013503645687667408, + "loss": 0.3424, + "step": 3222 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013500108830059133, + "loss": 0.3333, + "step": 3223 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013496571473411688, + "loss": 0.4042, + "step": 3224 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013493033618229417, + "loss": 0.4963, + "step": 3225 + }, + { + "epoch": 3.83, + "learning_rate": 0.0001348949526501675, + "loss": 0.3946, + "step": 3226 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013485956414278178, + "loss": 0.5807, + "step": 3227 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013482417066518256, + "loss": 0.4561, + "step": 3228 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013478877222241627, + "loss": 0.4964, + "step": 3229 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013475336881952986, + "loss": 0.6429, + "step": 3230 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013471796046157116, + "loss": 0.5466, + "step": 3231 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013468254715358861, + "loss": 0.3882, + "step": 3232 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013464712890063138, + "loss": 0.5006, + "step": 3233 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001346117057077493, + "loss": 0.494, + "step": 3234 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013457627757999303, + "loss": 0.5444, + "step": 3235 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013454084452241372, + "loss": 0.3714, + "step": 3236 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013450540654006348, + "loss": 0.3335, + "step": 3237 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001344699636379949, + "loss": 0.4771, + "step": 3238 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013443451582126144, + "loss": 0.466, + "step": 3239 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013439906309491712, + "loss": 0.5537, + "step": 3240 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013436360546401676, + "loss": 0.5899, + "step": 3241 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013432814293361584, + "loss": 0.443, + "step": 3242 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013429267550877055, + "loss": 0.4238, + "step": 3243 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013425720319453773, + "loss": 0.6529, + "step": 3244 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013422172599597505, + "loss": 0.6163, + "step": 3245 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013418624391814068, + "loss": 0.5183, + "step": 3246 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013415075696609363, + "loss": 0.7659, + "step": 3247 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001341152651448936, + "loss": 0.3717, + "step": 3248 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340797684596009, + "loss": 0.6885, + "step": 3249 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340442669152766, + "loss": 0.4483, + "step": 3250 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340087605169825, + "loss": 0.3417, + "step": 3251 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013397324926978094, + "loss": 0.4751, + "step": 3252 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013393773317873508, + "loss": 0.4448, + "step": 3253 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013390221224890878, + "loss": 0.6278, + "step": 3254 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013386668648536655, + "loss": 0.2995, + "step": 3255 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013383115589317353, + "loss": 0.535, + "step": 3256 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013379562047739568, + "loss": 0.4972, + "step": 3257 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013376008024309948, + "loss": 0.4821, + "step": 3258 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001337245351953523, + "loss": 0.392, + "step": 3259 + }, + { + "epoch": 3.87, + "learning_rate": 0.000133688985339222, + "loss": 0.413, + "step": 3260 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013365343067977726, + "loss": 0.4689, + "step": 3261 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013361787122208744, + "loss": 0.4737, + "step": 3262 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013358230697122246, + "loss": 0.5033, + "step": 3263 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013354673793225302, + "loss": 0.4901, + "step": 3264 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013351116411025054, + "loss": 0.5776, + "step": 3265 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013347558551028702, + "loss": 0.5005, + "step": 3266 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013344000213743522, + "loss": 0.6475, + "step": 3267 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013340441399676856, + "loss": 0.4394, + "step": 3268 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333688210933611, + "loss": 0.4351, + "step": 3269 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333332234322876, + "loss": 0.4526, + "step": 3270 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001332976210186236, + "loss": 0.3006, + "step": 3271 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013326201385744518, + "loss": 0.382, + "step": 3272 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013322640195382907, + "loss": 0.3488, + "step": 3273 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013319078531285285, + "loss": 0.5538, + "step": 3274 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013315516393959463, + "loss": 0.5328, + "step": 3275 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013311953783913324, + "loss": 0.5216, + "step": 3276 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330839070165482, + "loss": 0.3845, + "step": 3277 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330482714769197, + "loss": 0.5293, + "step": 3278 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013301263122532855, + "loss": 0.5415, + "step": 3279 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001329769862668563, + "loss": 0.5309, + "step": 3280 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013294133660658516, + "loss": 0.4629, + "step": 3281 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013290568224959794, + "loss": 0.4329, + "step": 3282 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013287002320097821, + "loss": 0.3973, + "step": 3283 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001328343594658102, + "loss": 0.3417, + "step": 3284 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013279869104917873, + "loss": 0.4784, + "step": 3285 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013276301795616936, + "loss": 0.3668, + "step": 3286 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001327273401918683, + "loss": 0.3726, + "step": 3287 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013269165776136238, + "loss": 0.518, + "step": 3288 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013265597066973922, + "loss": 0.3864, + "step": 3289 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013262027892208694, + "loss": 0.4249, + "step": 3290 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013258458252349444, + "loss": 0.395, + "step": 3291 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013254888147905126, + "loss": 0.8359, + "step": 3292 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013251317579384756, + "loss": 0.5028, + "step": 3293 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324774654729742, + "loss": 0.4216, + "step": 3294 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324417505215227, + "loss": 0.6145, + "step": 3295 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013240603094458522, + "loss": 0.6158, + "step": 3296 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013237030674725464, + "loss": 0.5101, + "step": 3297 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001323345779346244, + "loss": 0.6933, + "step": 3298 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322988445117886, + "loss": 0.4192, + "step": 3299 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322631064838422, + "loss": 0.4549, + "step": 3300 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013222736385588054, + "loss": 0.4947, + "step": 3301 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013219161663299982, + "loss": 0.5383, + "step": 3302 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013215586482029669, + "loss": 0.4919, + "step": 3303 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001321201084228687, + "loss": 0.4603, + "step": 3304 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013208434744581385, + "loss": 0.3127, + "step": 3305 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013204858189423097, + "loss": 0.754, + "step": 3306 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013201281177321935, + "loss": 0.3746, + "step": 3307 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013197703708787913, + "loss": 0.5576, + "step": 3308 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001319412578433109, + "loss": 0.4992, + "step": 3309 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013190547404461598, + "loss": 0.4533, + "step": 3310 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001318696856968965, + "loss": 0.4155, + "step": 3311 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013183389280525497, + "loss": 0.3661, + "step": 3312 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013179809537479476, + "loss": 0.4512, + "step": 3313 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013176229341061975, + "loss": 0.5895, + "step": 3314 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013172648691783454, + "loss": 0.3308, + "step": 3315 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013169067590154432, + "loss": 0.4128, + "step": 3316 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013165486036685503, + "loss": 0.5432, + "step": 3317 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001316190403188731, + "loss": 0.4297, + "step": 3318 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013158321576270575, + "loss": 0.4259, + "step": 3319 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315473867034608, + "loss": 0.4428, + "step": 3320 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315115531462466, + "loss": 0.6495, + "step": 3321 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013147571509617228, + "loss": 0.5706, + "step": 3322 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314398725583476, + "loss": 0.3647, + "step": 3323 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314040255378829, + "loss": 0.4864, + "step": 3324 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013136817403988917, + "loss": 0.4197, + "step": 3325 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013133231806947805, + "loss": 0.4818, + "step": 3326 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013129645763176184, + "loss": 0.4201, + "step": 3327 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312605927318534, + "loss": 0.4352, + "step": 3328 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312247233748664, + "loss": 0.2785, + "step": 3329 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001311888495659149, + "loss": 0.4424, + "step": 3330 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013115297131011382, + "loss": 0.4258, + "step": 3331 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013111708861257855, + "loss": 0.4332, + "step": 3332 + } + ], + "logging_steps": 1, + "max_steps": 8330, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 833, + "total_flos": 1.1678909660399665e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3332/trainer_state.json:com.dropbox.attrs b/checkpoint-3332/trainer_state.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..1ee9dbae7ee56dfa3b8e67f40cbf5c91984cdd5f Binary files /dev/null and b/checkpoint-3332/trainer_state.json:com.dropbox.attrs differ diff --git a/checkpoint-3332/training_args.bin b/checkpoint-3332/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207 --- /dev/null +++ b/checkpoint-3332/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869 +size 4859 diff --git a/checkpoint-3332/training_args.bin:com.dropbox.attrs b/checkpoint-3332/training_args.bin:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..ffe32c070622a02ccf62b565de31130fb16609ce Binary files /dev/null and b/checkpoint-3332/training_args.bin:com.dropbox.attrs differ diff --git a/checkpoint-4165/README.md b/checkpoint-4165/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda --- /dev/null +++ b/checkpoint-4165/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: mistralai/Mixtral-8x7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.7.0 \ No newline at end of file diff --git a/checkpoint-4165/README.md:com.dropbox.attrs b/checkpoint-4165/README.md:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..742d48d310ed788c62650b5e2b05a5984862117e Binary files /dev/null and b/checkpoint-4165/README.md:com.dropbox.attrs differ diff --git a/checkpoint-4165/adapter_config.json b/checkpoint-4165/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a --- /dev/null +++ b/checkpoint-4165/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "w1", + "gate", + "w2", + "q_proj", + "w3", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4165/adapter_config.json:com.dropbox.attrs b/checkpoint-4165/adapter_config.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..73d0543343fcc90a29326359185ac5c839148b65 Binary files /dev/null and b/checkpoint-4165/adapter_config.json:com.dropbox.attrs differ diff --git a/checkpoint-4165/adapter_model.safetensors b/checkpoint-4165/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a20e4ab9a1878d232b5cae6f89086006cacf2ada --- /dev/null +++ b/checkpoint-4165/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e8409fd75d6737abc14ae7d214c4b6235759b1472d5fc69a6e2bf87d2150df +size 3875879784 diff --git a/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..d52bcc89b53d9461c74fef2786aa8233ab654fd2 Binary files /dev/null and b/checkpoint-4165/adapter_model.safetensors:com.dropbox.attrs differ diff --git a/checkpoint-4165/optimizer.pt b/checkpoint-4165/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2142de25ad7fbabff81907d1943147a525be5df --- /dev/null +++ b/checkpoint-4165/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:392e12045d33cf98614290b947d1350cb61f0283f17168b7aa9a5591c8866410 +size 1943844127 diff --git a/checkpoint-4165/optimizer.pt:com.dropbox.attrs b/checkpoint-4165/optimizer.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..86300b00d29a9d85f88c14ca896987cc6f09786a Binary files /dev/null and b/checkpoint-4165/optimizer.pt:com.dropbox.attrs differ diff --git a/checkpoint-4165/rng_state.pth b/checkpoint-4165/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73f93be7fad32b80faea3d812a3f621b43e8af81 --- /dev/null +++ b/checkpoint-4165/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a5fc4dc6499a7d54d02a029e272d15defccd92d64b2103b2defa9358c5cb2c +size 14575 diff --git a/checkpoint-4165/rng_state.pth:com.dropbox.attrs b/checkpoint-4165/rng_state.pth:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..efdb558e4d125445320b019bca01d63dd63e75e3 Binary files /dev/null and b/checkpoint-4165/rng_state.pth:com.dropbox.attrs differ diff --git a/checkpoint-4165/scheduler.pt b/checkpoint-4165/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0ce657afa6e8e81abadf06cb9561e11b4083f8 --- /dev/null +++ b/checkpoint-4165/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844122fb3255d7552c2d49de519b6d737b8391693908ec71f80d185bffa3d134 +size 627 diff --git a/checkpoint-4165/scheduler.pt:com.dropbox.attrs b/checkpoint-4165/scheduler.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..ee72b37d76f9be9cd7760c13d4fd58d9f266e3b8 Binary files /dev/null and b/checkpoint-4165/scheduler.pt:com.dropbox.attrs differ diff --git a/checkpoint-4165/trainer_state.json b/checkpoint-4165/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..714a59af08793c20f90a58478160d2a5c4c7f7f6 --- /dev/null +++ b/checkpoint-4165/trainer_state.json @@ -0,0 +1,25171 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.942376950780313, + "eval_steps": 209, + "global_step": 4165, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.1426, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 2.071432113647461, + "eval_runtime": 279.6718, + "eval_samples_per_second": 0.737, + "eval_steps_per_second": 0.737, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.4033, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 2.1893, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 2.3226, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 2.2485, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.9704, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014, + "loss": 1.6929, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 2.2957, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018, + "loss": 1.9907, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 2.1295, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999287109068, + "loss": 2.2249, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999997148436365, + "loss": 2.1733, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199999935839822, + "loss": 2.1404, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999988593747084, + "loss": 2.0236, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999982177731722, + "loss": 1.9639, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999974335937034, + "loss": 1.692, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999965068364137, + "loss": 2.3609, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999954375014348, + "loss": 2.3553, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999942255889198, + "loss": 1.5733, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999928710990412, + "loss": 1.7505, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999913740319922, + "loss": 2.3068, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999897343879862, + "loss": 1.8371, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999987952167257, + "loss": 1.9852, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999860273700585, + "loss": 1.9625, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999839599966655, + "loss": 2.1089, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999817500473724, + "loss": 2.1086, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999793975224945, + "loss": 2.0284, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999769024223673, + "loss": 2.3641, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999742647473464, + "loss": 1.963, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999714844978078, + "loss": 2.0635, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999968561674148, + "loss": 1.9304, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999654962767839, + "loss": 1.4124, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999622883061518, + "loss": 2.1444, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999589377627102, + "loss": 1.6477, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955444646936, + "loss": 2.2601, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999518089593282, + "loss": 1.6256, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999948030700404, + "loss": 1.9155, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999441098707025, + "loss": 2.1408, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999400464707832, + "loss": 2.104, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999935840501225, + "loss": 1.9841, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999314919626272, + "loss": 1.5924, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999270008556108, + "loss": 1.9956, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999223671808154, + "loss": 1.4673, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999175909389018, + "loss": 2.1595, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999126721305513, + "loss": 1.8439, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999076107564648, + "loss": 1.9961, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999024068173638, + "loss": 2.1504, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998970603139912, + "loss": 2.2907, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999891571247108, + "loss": 1.5709, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999885939617498, + "loss": 2.4504, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998801654259632, + "loss": 2.3787, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999874248673328, + "loss": 2.0434, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998681893604347, + "loss": 2.1671, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999861987488148, + "loss": 1.7432, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998556430573521, + "loss": 1.7737, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998491560689513, + "loss": 2.0122, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999842526523871, + "loss": 1.7545, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998357544230558, + "loss": 2.201, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998288397674716, + "loss": 2.0396, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999821782558104, + "loss": 1.9275, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998145827959598, + "loss": 1.7797, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999807240482065, + "loss": 2.1463, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997997556174665, + "loss": 1.935, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999792128203232, + "loss": 2.1182, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999784358240448, + "loss": 2.2297, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997764457302234, + "loss": 2.1052, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999768390673686, + "loss": 2.0777, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997601930719835, + "loss": 2.1419, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999751852926286, + "loss": 2.2586, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997433702377817, + "loss": 1.9089, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997347450076801, + "loss": 2.0587, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997259772372116, + "loss": 2.4143, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997170669276256, + "loss": 1.947, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997080140801932, + "loss": 2.008, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996988186962041, + "loss": 2.4912, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996894807769707, + "loss": 2.0279, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996800003238232, + "loss": 1.9914, + "step": 77 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001999670377338114, + "loss": 1.9091, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996606118212148, + "loss": 1.8038, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996507037745183, + "loss": 2.3573, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996406531994364, + "loss": 2.3204, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999630460097403, + "loss": 2.1619, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999620124469871, + "loss": 1.9977, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996096463183142, + "loss": 2.195, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995990256442263, + "loss": 1.9909, + "step": 85 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995882624491217, + "loss": 2.2001, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995773567345354, + "loss": 1.5795, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995663085020212, + "loss": 2.174, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995551177531557, + "loss": 1.9605, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995437844895334, + "loss": 2.1768, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999532308712771, + "loss": 1.6906, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995206904245037, + "loss": 2.1029, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995089296263893, + "loss": 2.0652, + "step": 93 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994970263201035, + "loss": 2.1733, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999484980507344, + "loss": 1.9413, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999472792189828, + "loss": 1.9538, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994604613692935, + "loss": 2.4158, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994479880474988, + "loss": 1.8964, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999435372226222, + "loss": 2.3135, + "step": 99 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999422613907262, + "loss": 2.127, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994097130924374, + "loss": 1.9954, + "step": 101 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993966697835883, + "loss": 2.1363, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993834839825738, + "loss": 1.7779, + "step": 103 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993701556912742, + "loss": 2.0923, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993566849115898, + "loss": 1.9183, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993430716454413, + "loss": 1.7894, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993293158947694, + "loss": 2.0094, + "step": 107 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999315417661536, + "loss": 2.1469, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999301376947722, + "loss": 1.6924, + "step": 109 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999287193755329, + "loss": 2.1794, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199927286808638, + "loss": 2.1338, + "step": 111 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019992583999429178, + "loss": 1.9988, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999243789327004, + "loss": 2.0735, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999229036240723, + "loss": 2.0521, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019992141406861776, + "loss": 1.9441, + "step": 115 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991991026654918, + "loss": 2.1244, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999183922180809, + "loss": 1.7937, + "step": 117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999168599234295, + "loss": 2.2603, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991531338281332, + "loss": 2.1846, + "step": 119 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991375259645293, + "loss": 2.3241, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991217756457085, + "loss": 2.0926, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991058828739165, + "loss": 2.0092, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990898476514193, + "loss": 1.8076, + "step": 123 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990736699805029, + "loss": 2.0369, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990573498634742, + "loss": 2.0488, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199904088730266, + "loss": 2.1534, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990242823004074, + "loss": 2.1406, + "step": 127 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990075348590839, + "loss": 1.9379, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019989906449810775, + "loss": 1.9781, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989736126687963, + "loss": 1.973, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989564379246683, + "loss": 1.6825, + "step": 131 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989391207511428, + "loss": 2.0843, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989216611506887, + "loss": 1.8547, + "step": 133 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989040591257952, + "loss": 1.7626, + "step": 134 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001998886314678972, + "loss": 2.0531, + "step": 135 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988684278127497, + "loss": 2.0031, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988503985296773, + "loss": 1.9342, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988322268323268, + "loss": 2.3297, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988139127232878, + "loss": 2.3401, + "step": 139 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987954562051725, + "loss": 1.8983, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001998776857280612, + "loss": 2.0621, + "step": 141 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987581159522578, + "loss": 2.0574, + "step": 142 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987392322227824, + "loss": 1.9516, + "step": 143 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987202060948783, + "loss": 2.1402, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987010375712577, + "loss": 1.8903, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986817266546539, + "loss": 1.8248, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986622733478204, + "loss": 1.9877, + "step": 147 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986426776535306, + "loss": 1.6272, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986229395745785, + "loss": 1.8605, + "step": 149 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986030591137783, + "loss": 1.6848, + "step": 150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985830362739647, + "loss": 2.1922, + "step": 151 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998562871057992, + "loss": 2.0238, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998542563468736, + "loss": 2.2246, + "step": 153 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985221135090914, + "loss": 1.9438, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019985015211819744, + "loss": 2.2136, + "step": 155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998480786490321, + "loss": 2.4563, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984599094370874, + "loss": 2.2138, + "step": 157 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984388900252503, + "loss": 2.2679, + "step": 158 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984177282578064, + "loss": 1.9537, + "step": 159 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998396424137773, + "loss": 2.0803, + "step": 160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998374977668188, + "loss": 2.0282, + "step": 161 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019983533888521087, + "loss": 2.0157, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998331657692613, + "loss": 1.7837, + "step": 163 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019983097841928, + "loss": 2.1556, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982877683557879, + "loss": 2.1447, + "step": 165 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982656101847162, + "loss": 2.4139, + "step": 166 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998243309682743, + "loss": 1.6788, + "step": 167 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982208668530493, + "loss": 1.9008, + "step": 168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998198281698834, + "loss": 2.173, + "step": 169 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019981755542233177, + "loss": 2.1837, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981526844297404, + "loss": 2.0639, + "step": 171 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981296723213632, + "loss": 2.3864, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981065179014673, + "loss": 1.923, + "step": 173 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980832211733535, + "loss": 1.9192, + "step": 174 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980597821403438, + "loss": 2.0335, + "step": 175 + }, + { + "epoch": 0.21, + "learning_rate": 0.000199803620080578, + "loss": 1.8172, + "step": 176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001998012477173024, + "loss": 2.0294, + "step": 177 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979886112454586, + "loss": 2.2889, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979646030264867, + "loss": 1.8498, + "step": 179 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997940452519531, + "loss": 2.0797, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997916159728035, + "loss": 2.2356, + "step": 181 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997891724655462, + "loss": 2.1187, + "step": 182 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978671473052964, + "loss": 1.9301, + "step": 183 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978424276810423, + "loss": 1.8582, + "step": 184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997817565786224, + "loss": 2.144, + "step": 185 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977925616243862, + "loss": 2.0595, + "step": 186 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977674151990945, + "loss": 1.9104, + "step": 187 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977421265139332, + "loss": 1.9727, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977166955725088, + "loss": 1.8727, + "step": 189 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997691122378447, + "loss": 2.0611, + "step": 190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997665406935394, + "loss": 2.0745, + "step": 191 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997639549247016, + "loss": 1.9974, + "step": 192 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019976135493169996, + "loss": 1.9856, + "step": 193 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975874071490526, + "loss": 1.778, + "step": 194 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975611227469016, + "loss": 1.8347, + "step": 195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997534696114294, + "loss": 1.5555, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019975081272549989, + "loss": 1.5625, + "step": 197 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974814161728032, + "loss": 1.9997, + "step": 198 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974545628715157, + "loss": 1.9523, + "step": 199 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974275673549654, + "loss": 2.1557, + "step": 200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974004296270006, + "loss": 1.8306, + "step": 201 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973731496914914, + "loss": 2.0051, + "step": 202 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973457275523264, + "loss": 2.201, + "step": 203 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997318163213416, + "loss": 2.2446, + "step": 204 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972904566786903, + "loss": 2.1172, + "step": 205 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972626079520995, + "loss": 1.9849, + "step": 206 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972346170376142, + "loss": 1.9774, + "step": 207 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001997206483939225, + "loss": 1.7625, + "step": 208 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971782086609436, + "loss": 2.2346, + "step": 209 + }, + { + "epoch": 0.25, + "eval_loss": 2.00066876411438, + "eval_runtime": 282.7648, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 209 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971497912068013, + "loss": 2.4185, + "step": 210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971212315808497, + "loss": 1.946, + "step": 211 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019970925297871605, + "loss": 2.0049, + "step": 212 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970636858298267, + "loss": 1.9545, + "step": 213 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970346997129598, + "loss": 1.9636, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970055714406938, + "loss": 1.9068, + "step": 215 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969763010171807, + "loss": 1.5749, + "step": 216 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969468884465942, + "loss": 1.7676, + "step": 217 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996917333733128, + "loss": 2.0329, + "step": 218 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996887636880996, + "loss": 1.9307, + "step": 219 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019968577978944323, + "loss": 2.134, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019968278167776908, + "loss": 2.0911, + "step": 221 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967976935350467, + "loss": 2.5057, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001996767428170795, + "loss": 1.9267, + "step": 223 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967370206892503, + "loss": 2.3569, + "step": 224 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967064710947488, + "loss": 1.992, + "step": 225 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966757793916454, + "loss": 2.01, + "step": 226 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966449455843165, + "loss": 1.8037, + "step": 227 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966139696771587, + "loss": 2.2498, + "step": 228 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019965828516745876, + "loss": 1.6563, + "step": 229 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996551591581041, + "loss": 1.979, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996520189400975, + "loss": 2.1553, + "step": 231 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996488645138867, + "loss": 1.8743, + "step": 232 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964569587992148, + "loss": 2.1907, + "step": 233 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964251303865362, + "loss": 2.0644, + "step": 234 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019963931599053692, + "loss": 2.1721, + "step": 235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996361047360272, + "loss": 2.2267, + "step": 236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996328792755823, + "loss": 1.9445, + "step": 237 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019962963960966213, + "loss": 2.2003, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996263857387286, + "loss": 2.3114, + "step": 239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996231176632456, + "loss": 1.8553, + "step": 240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961983538367914, + "loss": 2.1349, + "step": 241 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961653890049715, + "loss": 1.8784, + "step": 242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996132282141697, + "loss": 2.0118, + "step": 243 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960990332516874, + "loss": 1.9938, + "step": 244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960656423396834, + "loss": 2.2582, + "step": 245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019960321094104465, + "loss": 2.1807, + "step": 246 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959984344687578, + "loss": 1.9084, + "step": 247 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959646175194174, + "loss": 2.2879, + "step": 248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995930658567248, + "loss": 1.942, + "step": 249 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958965576170908, + "loss": 2.1313, + "step": 250 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958623146738088, + "loss": 2.3202, + "step": 251 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995827929742283, + "loss": 1.7832, + "step": 252 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957934028274162, + "loss": 1.7103, + "step": 253 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957587339341321, + "loss": 1.9912, + "step": 254 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995723923067373, + "loss": 1.6686, + "step": 255 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956889702321023, + "loss": 1.966, + "step": 256 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956538754333034, + "loss": 2.2287, + "step": 257 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956186386759804, + "loss": 1.4866, + "step": 258 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995583259965157, + "loss": 1.9599, + "step": 259 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019955477393058773, + "loss": 1.9273, + "step": 260 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995512076703206, + "loss": 1.847, + "step": 261 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019954762721622279, + "loss": 2.0535, + "step": 262 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995440325688048, + "loss": 2.4403, + "step": 263 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019954042372857908, + "loss": 1.8712, + "step": 264 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953680069606026, + "loss": 2.1837, + "step": 265 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953316347176488, + "loss": 2.0398, + "step": 266 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995295120562115, + "loss": 2.1135, + "step": 267 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952584644992075, + "loss": 2.0358, + "step": 268 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952216665341526, + "loss": 2.3282, + "step": 269 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995184726672197, + "loss": 1.9741, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951476449186074, + "loss": 1.7523, + "step": 271 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951104212786712, + "loss": 2.1509, + "step": 272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001995073055757695, + "loss": 2.0865, + "step": 273 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019950355483610067, + "loss": 1.8972, + "step": 274 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019949978990939542, + "loss": 2.4693, + "step": 275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994960107961905, + "loss": 1.9307, + "step": 276 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994922174970248, + "loss": 2.0097, + "step": 277 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994884100124391, + "loss": 1.6561, + "step": 278 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994845883429763, + "loss": 2.3069, + "step": 279 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019948075248918124, + "loss": 2.0134, + "step": 280 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947690245160091, + "loss": 2.1061, + "step": 281 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947303823078416, + "loss": 2.0855, + "step": 282 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946915982728197, + "loss": 1.5672, + "step": 283 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001994652672416473, + "loss": 1.7289, + "step": 284 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946136047443522, + "loss": 1.9013, + "step": 285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945743952620268, + "loss": 2.3105, + "step": 286 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945350439750872, + "loss": 2.341, + "step": 287 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944955508891443, + "loss": 1.88, + "step": 288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994455916009829, + "loss": 1.913, + "step": 289 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944161393427922, + "loss": 1.9513, + "step": 290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943762208937053, + "loss": 2.3331, + "step": 291 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943361606682597, + "loss": 2.3024, + "step": 292 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942959586721672, + "loss": 2.2222, + "step": 293 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942556149111598, + "loss": 2.1003, + "step": 294 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994215129390989, + "loss": 1.9038, + "step": 295 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941745021174282, + "loss": 1.6068, + "step": 296 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941337330962693, + "loss": 1.8894, + "step": 297 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940928223333252, + "loss": 2.3158, + "step": 298 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001994051769834429, + "loss": 2.1015, + "step": 299 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940105756054337, + "loss": 2.1519, + "step": 300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939692396522127, + "loss": 1.7233, + "step": 301 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939277619806598, + "loss": 1.85, + "step": 302 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938861425966887, + "loss": 2.2368, + "step": 303 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938443815062335, + "loss": 1.765, + "step": 304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993802478715248, + "loss": 1.6333, + "step": 305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937604342297073, + "loss": 2.191, + "step": 306 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937182480556055, + "loss": 2.2402, + "step": 307 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019936759201989577, + "loss": 2.0568, + "step": 308 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993633450665799, + "loss": 2.4314, + "step": 309 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935908394621844, + "loss": 2.0556, + "step": 310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935480865941894, + "loss": 2.0988, + "step": 311 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935051920679094, + "loss": 2.0964, + "step": 312 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019934621558894607, + "loss": 1.9365, + "step": 313 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993418978064979, + "loss": 1.6224, + "step": 314 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933756586006202, + "loss": 2.144, + "step": 315 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933321975025616, + "loss": 2.2899, + "step": 316 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019932885947769992, + "loss": 1.8865, + "step": 317 + }, + { + "epoch": 0.38, + "learning_rate": 0.000199324485043015, + "loss": 2.3996, + "step": 318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993200964468251, + "loss": 1.3858, + "step": 319 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019931569368975588, + "loss": 2.2231, + "step": 320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019931127677243516, + "loss": 2.0537, + "step": 321 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930684569549264, + "loss": 2.1381, + "step": 322 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930240045956012, + "loss": 2.0152, + "step": 323 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992979410652714, + "loss": 2.0293, + "step": 324 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019929346751326228, + "loss": 1.7457, + "step": 325 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928897980417057, + "loss": 1.987, + "step": 326 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928447793863616, + "loss": 2.2451, + "step": 327 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019927996191730093, + "loss": 2.3312, + "step": 328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992754317408087, + "loss": 1.8771, + "step": 329 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992708874098054, + "loss": 1.833, + "step": 330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926632892493896, + "loss": 1.9343, + "step": 331 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926175628685937, + "loss": 2.2328, + "step": 332 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992571694962185, + "loss": 1.9916, + "step": 333 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992525685536704, + "loss": 1.9497, + "step": 334 + }, + { + "epoch": 0.4, + "learning_rate": 0.000199247953459871, + "loss": 2.029, + "step": 335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019924332421547835, + "loss": 2.0326, + "step": 336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992386808211525, + "loss": 2.6406, + "step": 337 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019923402327755546, + "loss": 2.3811, + "step": 338 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922935158535129, + "loss": 1.6143, + "step": 339 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922466574520608, + "loss": 2.2182, + "step": 340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921996575778794, + "loss": 2.218, + "step": 341 + }, + { + "epoch": 0.41, + "learning_rate": 0.000199215251623767, + "loss": 1.8615, + "step": 342 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921052334381534, + "loss": 2.165, + "step": 343 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019920578091860716, + "loss": 2.1627, + "step": 344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001992010243488186, + "loss": 2.154, + "step": 345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919625363512786, + "loss": 1.5966, + "step": 346 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919146877821512, + "loss": 2.0903, + "step": 347 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991866697787626, + "loss": 2.2322, + "step": 348 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019918185663745456, + "loss": 1.9319, + "step": 349 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917702935497725, + "loss": 2.1367, + "step": 350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917218793201886, + "loss": 2.1767, + "step": 351 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019916733236926976, + "loss": 2.1009, + "step": 352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991624626674222, + "loss": 2.1286, + "step": 353 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991575788271705, + "loss": 2.181, + "step": 354 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019915268084921101, + "loss": 2.12, + "step": 355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019914776873424206, + "loss": 1.9895, + "step": 356 + }, + { + "epoch": 0.43, + "learning_rate": 0.000199142842482964, + "loss": 1.9285, + "step": 357 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991379020960792, + "loss": 2.2376, + "step": 358 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991329475742921, + "loss": 2.1274, + "step": 359 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912797891830908, + "loss": 2.0043, + "step": 360 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912299612883852, + "loss": 2.022, + "step": 361 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019911799920659093, + "loss": 1.7343, + "step": 362 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991129881522787, + "loss": 2.0621, + "step": 363 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019910796296661632, + "loss": 1.5116, + "step": 364 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991029236503203, + "loss": 2.0485, + "step": 365 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909787020410907, + "loss": 1.971, + "step": 366 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909280262870324, + "loss": 1.9724, + "step": 367 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908772092482524, + "loss": 1.318, + "step": 368 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908262509319964, + "loss": 2.0539, + "step": 369 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019907751513455302, + "loss": 2.1097, + "step": 370 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019907239104961392, + "loss": 2.0632, + "step": 371 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906725283911296, + "loss": 2.1897, + "step": 372 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906210050378266, + "loss": 2.2002, + "step": 373 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905693404435773, + "loss": 1.9005, + "step": 374 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905175346157474, + "loss": 1.9873, + "step": 375 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904655875617233, + "loss": 1.7215, + "step": 376 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904134992889113, + "loss": 2.0434, + "step": 377 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903612698047383, + "loss": 2.4223, + "step": 378 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903088991166513, + "loss": 2.0837, + "step": 379 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902563872321172, + "loss": 2.2389, + "step": 380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902037341586225, + "loss": 1.7205, + "step": 381 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001990150939903675, + "loss": 1.9577, + "step": 382 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019900980044748015, + "loss": 1.8778, + "step": 383 + }, + { + "epoch": 0.46, + "learning_rate": 0.000199004492787955, + "loss": 2.2213, + "step": 384 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899917101254874, + "loss": 2.0927, + "step": 385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899383512202019, + "loss": 2.2921, + "step": 386 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001989884851171301, + "loss": 2.2983, + "step": 387 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989831209986413, + "loss": 1.8052, + "step": 388 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897774276731857, + "loss": 1.7741, + "step": 389 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897235042392873, + "loss": 1.779, + "step": 390 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896694396924063, + "loss": 1.6924, + "step": 391 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896152340402509, + "loss": 2.036, + "step": 392 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019895608872905494, + "loss": 2.04, + "step": 393 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989506399451051, + "loss": 2.1702, + "step": 394 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019894517705295245, + "loss": 1.9429, + "step": 395 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893970005337584, + "loss": 2.0528, + "step": 396 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893420894715618, + "loss": 1.7906, + "step": 397 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989287037350764, + "loss": 2.3494, + "step": 398 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019892318441792138, + "loss": 1.7415, + "step": 399 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989176509964781, + "loss": 2.0184, + "step": 400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989121034715355, + "loss": 1.9277, + "step": 401 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989065418438845, + "loss": 2.2168, + "step": 402 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019890096611431814, + "loss": 2.6114, + "step": 403 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019889537628363133, + "loss": 2.0713, + "step": 404 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888977235262104, + "loss": 2.2966, + "step": 405 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888415432208636, + "loss": 2.5206, + "step": 406 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887852219282822, + "loss": 2.4503, + "step": 407 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887287596564966, + "loss": 2.102, + "step": 408 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886721564135572, + "loss": 2.3275, + "step": 409 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886154122075343, + "loss": 2.0481, + "step": 410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885585270465182, + "loss": 1.8395, + "step": 411 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885015009386202, + "loss": 2.3535, + "step": 412 + }, + { + "epoch": 0.5, + "learning_rate": 0.000198844433389197, + "loss": 2.0147, + "step": 413 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988387025914719, + "loss": 2.1919, + "step": 414 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988329577015038, + "loss": 2.156, + "step": 415 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882719872011176, + "loss": 2.2672, + "step": 416 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882142564811694, + "loss": 2.3242, + "step": 417 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988156384863424, + "loss": 2.0259, + "step": 418 + }, + { + "epoch": 0.5, + "eval_loss": 1.9941134452819824, + "eval_runtime": 282.533, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 418 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880983723561332, + "loss": 1.7039, + "step": 419 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880402189675678, + "loss": 2.1007, + "step": 420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879819247060193, + "loss": 2.2297, + "step": 421 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879234895797996, + "loss": 1.6166, + "step": 422 + }, + { + "epoch": 0.51, + "learning_rate": 0.000198786491359724, + "loss": 2.408, + "step": 423 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019878061967666915, + "loss": 1.686, + "step": 424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001987747339096527, + "loss": 2.0492, + "step": 425 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876883405951377, + "loss": 2.2179, + "step": 426 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876292012709356, + "loss": 1.8812, + "step": 427 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019875699211323528, + "loss": 2.2888, + "step": 428 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019875105001878409, + "loss": 2.0561, + "step": 429 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019874509384458725, + "loss": 1.9299, + "step": 430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873912359149397, + "loss": 2.1999, + "step": 431 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873313926035548, + "loss": 1.8509, + "step": 432 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019872714085202503, + "loss": 1.8281, + "step": 433 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987211283673578, + "loss": 1.8359, + "step": 434 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987151018072111, + "loss": 2.2844, + "step": 435 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870906117244416, + "loss": 1.9397, + "step": 436 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870300646391824, + "loss": 2.302, + "step": 437 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869693768249661, + "loss": 2.1176, + "step": 438 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869085482904458, + "loss": 2.1909, + "step": 439 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986847579044294, + "loss": 2.2382, + "step": 440 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867864690952035, + "loss": 2.0988, + "step": 441 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867252184518878, + "loss": 2.2136, + "step": 442 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986663827123079, + "loss": 1.9324, + "step": 443 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019866022951175308, + "loss": 2.1274, + "step": 444 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019865406224440165, + "loss": 1.8625, + "step": 445 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019864788091113287, + "loss": 2.0009, + "step": 446 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001986416855128281, + "loss": 2.2245, + "step": 447 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019863547605037063, + "loss": 2.0654, + "step": 448 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862925252464586, + "loss": 1.4339, + "step": 449 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862301493654108, + "loss": 2.1347, + "step": 450 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861676328694562, + "loss": 1.7029, + "step": 451 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861049757675088, + "loss": 2.0081, + "step": 452 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019860421780685018, + "loss": 1.9994, + "step": 453 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985979239781389, + "loss": 1.9325, + "step": 454 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019859161609151436, + "loss": 1.8502, + "step": 455 + }, + { + "epoch": 0.55, + "learning_rate": 0.000198585294147876, + "loss": 2.3779, + "step": 456 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019857895814812509, + "loss": 2.0303, + "step": 457 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985726080931651, + "loss": 1.9898, + "step": 458 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019856624398390137, + "loss": 1.7648, + "step": 459 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019855986582124126, + "loss": 1.7822, + "step": 460 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985534736060942, + "loss": 1.9219, + "step": 461 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019854706733937155, + "loss": 2.1789, + "step": 462 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019854064702198675, + "loss": 1.9091, + "step": 463 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019853421265485514, + "loss": 1.9941, + "step": 464 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985277642388941, + "loss": 1.904, + "step": 465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019852130177502316, + "loss": 1.6299, + "step": 466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985148252641636, + "loss": 1.7712, + "step": 467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019850833470723886, + "loss": 1.6825, + "step": 468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985018301051744, + "loss": 1.7408, + "step": 469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019849531145889758, + "loss": 2.0622, + "step": 470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019848877876933784, + "loss": 1.5699, + "step": 471 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001984822320374266, + "loss": 2.0253, + "step": 472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019847567126409724, + "loss": 2.2186, + "step": 473 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019846909645028523, + "loss": 2.0872, + "step": 474 + }, + { + "epoch": 0.57, + "learning_rate": 0.000198462507596928, + "loss": 1.9362, + "step": 475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019845590470496497, + "loss": 2.4109, + "step": 476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844928777533753, + "loss": 2.2626, + "step": 477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844265680898918, + "loss": 2.0874, + "step": 478 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984360118068653, + "loss": 2.1606, + "step": 479 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984293527699133, + "loss": 2.063, + "step": 480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019842267969908265, + "loss": 1.9065, + "step": 481 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984159925953248, + "loss": 1.9511, + "step": 482 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840929145959317, + "loss": 2.056, + "step": 483 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840257629284317, + "loss": 2.2353, + "step": 484 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019839584709603226, + "loss": 1.9401, + "step": 485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001983891038701199, + "loss": 1.9648, + "step": 486 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019838234661606748, + "loss": 1.753, + "step": 487 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019837557533483846, + "loss": 1.7805, + "step": 488 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836879002739827, + "loss": 2.192, + "step": 489 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836199069471437, + "loss": 1.9112, + "step": 490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019835517733775615, + "loss": 2.0119, + "step": 491 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001983483499574951, + "loss": 1.8932, + "step": 492 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019834150855490464, + "loss": 1.5968, + "step": 493 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019833465313096017, + "loss": 2.1493, + "step": 494 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019832778368663917, + "loss": 1.8863, + "step": 495 + }, + { + "epoch": 0.6, + "learning_rate": 0.000198320900222921, + "loss": 2.2134, + "step": 496 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019831400274078717, + "loss": 2.2831, + "step": 497 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019830709124122112, + "loss": 2.0266, + "step": 498 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001983001657252082, + "loss": 2.3392, + "step": 499 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019829322619373588, + "loss": 1.8426, + "step": 500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019828627264779363, + "loss": 2.0742, + "step": 501 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001982793050883728, + "loss": 1.9578, + "step": 502 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019827232351646686, + "loss": 2.0863, + "step": 503 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982653279330712, + "loss": 2.2881, + "step": 504 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019825831833918323, + "loss": 1.8869, + "step": 505 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982512947358024, + "loss": 1.8997, + "step": 506 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019824425712393012, + "loss": 1.8945, + "step": 507 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019823720550456977, + "loss": 1.9496, + "step": 508 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982301398787268, + "loss": 2.1066, + "step": 509 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019822306024740852, + "loss": 1.958, + "step": 510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019821596661162447, + "loss": 2.1112, + "step": 511 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019820885897238596, + "loss": 2.1012, + "step": 512 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001982017373307064, + "loss": 2.2623, + "step": 513 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019819460168760117, + "loss": 2.5058, + "step": 514 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981874520440877, + "loss": 2.1367, + "step": 515 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019818028840118532, + "loss": 2.2743, + "step": 516 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019817311075991543, + "loss": 1.5517, + "step": 517 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981659191213014, + "loss": 1.9569, + "step": 518 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815871348636863, + "loss": 2.0566, + "step": 519 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815149385614444, + "loss": 1.8859, + "step": 520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019814426023165825, + "loss": 2.0298, + "step": 521 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019813701261394136, + "loss": 2.0614, + "step": 522 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812975100402715, + "loss": 2.221, + "step": 523 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812247540295096, + "loss": 2.1255, + "step": 524 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019811518581175014, + "loss": 2.1885, + "step": 525 + }, + { + "epoch": 0.63, + "learning_rate": 0.000198107882231464, + "loss": 2.3918, + "step": 526 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019810056466313392, + "loss": 2.2759, + "step": 527 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019809323310780318, + "loss": 1.9727, + "step": 528 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980858875665171, + "loss": 2.0417, + "step": 529 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019807852804032305, + "loss": 1.645, + "step": 530 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980711545302703, + "loss": 1.7943, + "step": 531 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019806376703741015, + "loss": 1.8844, + "step": 532 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019805636556279588, + "loss": 2.1128, + "step": 533 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980489501074828, + "loss": 2.0272, + "step": 534 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019804152067252816, + "loss": 2.0916, + "step": 535 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019803407725899131, + "loss": 1.7287, + "step": 536 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019802661986793342, + "loss": 2.0667, + "step": 537 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801914850041784, + "loss": 2.4016, + "step": 538 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801166315750978, + "loss": 1.8557, + "step": 539 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001980041638402765, + "loss": 1.8072, + "step": 540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019799665054978722, + "loss": 2.2252, + "step": 541 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798912328711322, + "loss": 2.1377, + "step": 542 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798158205332764, + "loss": 2.0306, + "step": 543 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019797402684950576, + "loss": 1.7428, + "step": 544 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019796645767672477, + "loss": 2.0843, + "step": 545 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795887453606388, + "loss": 1.9175, + "step": 546 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795127742860423, + "loss": 1.6673, + "step": 547 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001979436663554291, + "loss": 1.5553, + "step": 548 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019793604131762357, + "loss": 1.604, + "step": 549 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792840231627482, + "loss": 2.023, + "step": 550 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792074935247206, + "loss": 1.8399, + "step": 551 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019791308242730638, + "loss": 1.8579, + "step": 552 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019790540154187094, + "loss": 2.2135, + "step": 553 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019789770669726087, + "loss": 1.7894, + "step": 554 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788999789457326, + "loss": 2.1723, + "step": 555 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788227513490723, + "loss": 2.0881, + "step": 556 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019787453841936393, + "loss": 1.7181, + "step": 557 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019786678774904638, + "loss": 1.8725, + "step": 558 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785902312505964, + "loss": 2.0544, + "step": 559 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785124454851084, + "loss": 1.7503, + "step": 560 + }, + { + "epoch": 0.67, + "learning_rate": 0.000197843452020509, + "loss": 2.01, + "step": 561 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019783564554216518, + "loss": 1.748, + "step": 562 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978278251145924, + "loss": 2.0866, + "step": 563 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978199907389057, + "loss": 1.6046, + "step": 564 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019781214241622208, + "loss": 1.9222, + "step": 565 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019780428014766051, + "loss": 2.2003, + "step": 566 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019779640393434206, + "loss": 2.0534, + "step": 567 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001977885137773896, + "loss": 1.8609, + "step": 568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019778060967792817, + "loss": 2.0666, + "step": 569 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019777269163708468, + "loss": 1.9512, + "step": 570 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019776475965598814, + "loss": 1.8349, + "step": 571 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977568137357694, + "loss": 2.0507, + "step": 572 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019774885387756138, + "loss": 1.7588, + "step": 573 + }, + { + "epoch": 0.69, + "learning_rate": 0.000197740880082499, + "loss": 2.0981, + "step": 574 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019773289235171918, + "loss": 2.0953, + "step": 575 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019772489068636077, + "loss": 2.0678, + "step": 576 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019771687508756466, + "loss": 2.0136, + "step": 577 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977088455564736, + "loss": 1.9781, + "step": 578 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019770080209423254, + "loss": 2.2185, + "step": 579 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019769274470198827, + "loss": 1.8076, + "step": 580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019768467338088957, + "loss": 1.6888, + "step": 581 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019767658813208726, + "loss": 2.1273, + "step": 582 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001976684889567341, + "loss": 2.3232, + "step": 583 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019766037585598487, + "loss": 2.366, + "step": 584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019765224883099635, + "loss": 1.8939, + "step": 585 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019764410788292722, + "loss": 2.0162, + "step": 586 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019763595301293822, + "loss": 2.2752, + "step": 587 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976277842221921, + "loss": 1.9461, + "step": 588 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976196015118535, + "loss": 1.9999, + "step": 589 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976114048830891, + "loss": 2.0169, + "step": 590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019760319433706757, + "loss": 2.1838, + "step": 591 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019759496987495955, + "loss": 2.3513, + "step": 592 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975867314979377, + "loss": 1.9915, + "step": 593 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975784792071766, + "loss": 2.1973, + "step": 594 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019757021300385286, + "loss": 2.3112, + "step": 595 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019756193288914507, + "loss": 2.0992, + "step": 596 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019755363886423376, + "loss": 2.4266, + "step": 597 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019754533093030148, + "loss": 1.7649, + "step": 598 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975370090885328, + "loss": 1.7573, + "step": 599 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019752867334011423, + "loss": 1.7949, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975203236862342, + "loss": 2.0229, + "step": 601 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019751196012808325, + "loss": 2.0519, + "step": 602 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019750358266685383, + "loss": 2.0829, + "step": 603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019749519130374038, + "loss": 2.0153, + "step": 604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019748678603993933, + "loss": 1.8594, + "step": 605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019747836687664908, + "loss": 2.1385, + "step": 606 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746993381507003, + "loss": 2.1317, + "step": 607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746148685640451, + "loss": 1.1676, + "step": 608 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974530260018569, + "loss": 2.2856, + "step": 609 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974445512526336, + "loss": 2.1973, + "step": 610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019743606260994278, + "loss": 1.6912, + "step": 611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019742756007499486, + "loss": 1.8091, + "step": 612 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741904364900208, + "loss": 2.0108, + "step": 613 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741051333317867, + "loss": 2.1061, + "step": 614 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019740196912874087, + "loss": 1.8934, + "step": 615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019739341103690693, + "loss": 1.8599, + "step": 616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019738483905889703, + "loss": 2.0025, + "step": 617 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019737625319593335, + "loss": 1.8247, + "step": 618 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019736765344924005, + "loss": 2.222, + "step": 619 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019735903982004324, + "loss": 2.116, + "step": 620 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973504123095711, + "loss": 1.9183, + "step": 621 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973417709190536, + "loss": 2.1507, + "step": 622 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019733311564972296, + "loss": 1.7899, + "step": 623 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019732444650281315, + "loss": 2.1005, + "step": 624 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973157634795602, + "loss": 2.2391, + "step": 625 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019730706658120214, + "loss": 1.9466, + "step": 626 + }, + { + "epoch": 0.75, + "learning_rate": 0.000197298355808979, + "loss": 1.9854, + "step": 627 + }, + { + "epoch": 0.75, + "eval_loss": 1.9957869052886963, + "eval_runtime": 282.5544, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 627 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019728963116413266, + "loss": 2.1877, + "step": 628 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019728089264790712, + "loss": 2.2194, + "step": 629 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019727214026154827, + "loss": 1.9631, + "step": 630 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019726337400630405, + "loss": 2.3506, + "step": 631 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019725459388342432, + "loss": 2.0543, + "step": 632 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972457998941609, + "loss": 2.0402, + "step": 633 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019723699203976766, + "loss": 1.9316, + "step": 634 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972281703215004, + "loss": 2.2024, + "step": 635 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721933474061692, + "loss": 1.6776, + "step": 636 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721048529837694, + "loss": 1.9757, + "step": 637 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019720162199604222, + "loss": 1.7631, + "step": 638 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019719274483487648, + "loss": 2.34, + "step": 639 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001971838538161454, + "loss": 1.8469, + "step": 640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019717494894111662, + "loss": 2.3151, + "step": 641 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019716603021105987, + "loss": 2.0661, + "step": 642 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019715709762724667, + "loss": 2.0408, + "step": 643 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019714815119095062, + "loss": 1.9848, + "step": 644 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019713919090344736, + "loss": 2.3134, + "step": 645 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019713021676601438, + "loss": 2.4947, + "step": 646 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001971212287799312, + "loss": 2.0515, + "step": 647 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019711222694647932, + "loss": 2.6216, + "step": 648 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019710321126694216, + "loss": 1.6517, + "step": 649 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001970941817426052, + "loss": 2.0408, + "step": 650 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019708513837475588, + "loss": 1.8841, + "step": 651 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019707608116468356, + "loss": 2.1966, + "step": 652 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019706701011367955, + "loss": 1.7587, + "step": 653 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970579252230373, + "loss": 2.2196, + "step": 654 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019704882649405198, + "loss": 1.8146, + "step": 655 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703971392802098, + "loss": 2.2932, + "step": 656 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703058752624353, + "loss": 1.923, + "step": 657 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970214472900208, + "loss": 2.2393, + "step": 658 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019701229322065605, + "loss": 1.7338, + "step": 659 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019700312531945442, + "loss": 1.7859, + "step": 660 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019699394358772306, + "loss": 2.2719, + "step": 661 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019698474802677107, + "loss": 1.576, + "step": 662 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019697553863790956, + "loss": 2.3333, + "step": 663 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019696631542245156, + "loss": 2.3508, + "step": 664 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019695707838171216, + "loss": 2.1876, + "step": 665 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019694782751700828, + "loss": 1.4863, + "step": 666 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019693856282965898, + "loss": 1.8948, + "step": 667 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019692928432098512, + "loss": 1.6867, + "step": 668 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691999199230963, + "loss": 1.7682, + "step": 669 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691068584495742, + "loss": 2.0914, + "step": 670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019690136588025535, + "loss": 2.1413, + "step": 671 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019689203209953223, + "loss": 2.1275, + "step": 672 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001968826845041188, + "loss": 1.9556, + "step": 673 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019687332309534792, + "loss": 2.2209, + "step": 674 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019686394787455424, + "loss": 1.9853, + "step": 675 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019685455884307454, + "loss": 2.0877, + "step": 676 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019684515600224743, + "loss": 2.1607, + "step": 677 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019683573935341358, + "loss": 2.2664, + "step": 678 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019682630889791556, + "loss": 1.8527, + "step": 679 + }, + { + "epoch": 0.82, + "learning_rate": 0.000196816864637098, + "loss": 1.8417, + "step": 680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019680740657230738, + "loss": 1.9853, + "step": 681 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019679793470489228, + "loss": 1.8419, + "step": 682 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019678844903620317, + "loss": 1.9971, + "step": 683 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019677894956759246, + "loss": 1.9843, + "step": 684 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019676943630041462, + "loss": 2.376, + "step": 685 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675990923602598, + "loss": 2.1558, + "step": 686 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675036837578494, + "loss": 1.5752, + "step": 687 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001967408137210518, + "loss": 1.6704, + "step": 688 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019673124527318881, + "loss": 2.1389, + "step": 689 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019672166303356028, + "loss": 2.126, + "step": 690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019671206700353237, + "loss": 1.9402, + "step": 691 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019670245718447335, + "loss": 1.6701, + "step": 692 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019669283357775328, + "loss": 1.8134, + "step": 693 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001966831961847443, + "loss": 2.1642, + "step": 694 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019667354500682054, + "loss": 1.8455, + "step": 695 + }, + { + "epoch": 0.84, + "learning_rate": 0.000196663880045358, + "loss": 1.9646, + "step": 696 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966542013017347, + "loss": 1.9855, + "step": 697 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019664450877733062, + "loss": 1.7029, + "step": 698 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019663480247352773, + "loss": 1.9789, + "step": 699 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966250823917099, + "loss": 1.8751, + "step": 700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019661534853326301, + "loss": 2.3644, + "step": 701 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019660560089957492, + "loss": 1.8006, + "step": 702 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001965958394920354, + "loss": 2.2799, + "step": 703 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019658606431203622, + "loss": 1.9258, + "step": 704 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965762753609711, + "loss": 1.9521, + "step": 705 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019656647264023575, + "loss": 1.9675, + "step": 706 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019655665615122783, + "loss": 2.3686, + "step": 707 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019654682589534693, + "loss": 2.1448, + "step": 708 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019653698187399466, + "loss": 2.2475, + "step": 709 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965271240885745, + "loss": 1.9417, + "step": 710 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965172525404921, + "loss": 2.154, + "step": 711 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019650736723115475, + "loss": 2.0646, + "step": 712 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019649746816197196, + "loss": 2.235, + "step": 713 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019648755533435518, + "loss": 1.7122, + "step": 714 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019647762874971765, + "loss": 2.0635, + "step": 715 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019646768840947474, + "loss": 1.8904, + "step": 716 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019645773431504373, + "loss": 1.608, + "step": 717 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019644776646784388, + "loss": 2.2307, + "step": 718 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001964377848692963, + "loss": 2.176, + "step": 719 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019642778952082426, + "loss": 2.1984, + "step": 720 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001964177804238528, + "loss": 2.2625, + "step": 721 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019640775757980903, + "loss": 2.3142, + "step": 722 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019639772099012197, + "loss": 2.2366, + "step": 723 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019638767065622266, + "loss": 1.7823, + "step": 724 + }, + { + "epoch": 0.87, + "learning_rate": 0.000196377606579544, + "loss": 2.0677, + "step": 725 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019636752876152095, + "loss": 1.3337, + "step": 726 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019635743720359037, + "loss": 2.055, + "step": 727 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001963473319071911, + "loss": 1.9888, + "step": 728 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019633721287376393, + "loss": 1.9258, + "step": 729 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019632708010475165, + "loss": 2.3768, + "step": 730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001963169336015989, + "loss": 1.993, + "step": 731 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019630677336575242, + "loss": 2.1989, + "step": 732 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001962965993986608, + "loss": 2.1216, + "step": 733 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019628641170177464, + "loss": 2.2217, + "step": 734 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019627621027654648, + "loss": 1.8809, + "step": 735 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019626599512443077, + "loss": 2.0864, + "step": 736 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019625576624688406, + "loss": 2.0627, + "step": 737 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019624552364536473, + "loss": 2.1347, + "step": 738 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019623526732133315, + "loss": 1.9998, + "step": 739 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019622499727625162, + "loss": 2.1998, + "step": 740 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019621471351158443, + "loss": 1.974, + "step": 741 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019620441602879787, + "loss": 1.9425, + "step": 742 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019619410482936008, + "loss": 2.6227, + "step": 743 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019618377991474124, + "loss": 2.1209, + "step": 744 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019617344128641345, + "loss": 2.0606, + "step": 745 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019616308894585078, + "loss": 2.296, + "step": 746 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019615272289452923, + "loss": 2.0415, + "step": 747 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961423431339268, + "loss": 1.9516, + "step": 748 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961319496655234, + "loss": 2.0468, + "step": 749 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961215424908009, + "loss": 1.877, + "step": 750 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961111216112432, + "loss": 1.8129, + "step": 751 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019610068702833596, + "loss": 1.9984, + "step": 752 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019609023874356707, + "loss": 1.9013, + "step": 753 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019607977675842615, + "loss": 2.0546, + "step": 754 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019606930107440485, + "loss": 2.2817, + "step": 755 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960588116929968, + "loss": 2.0578, + "step": 756 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019604830861569755, + "loss": 2.3521, + "step": 757 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019603779184400457, + "loss": 2.0392, + "step": 758 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960272613794174, + "loss": 1.9863, + "step": 759 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019601671722343738, + "loss": 2.1889, + "step": 760 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960061593775679, + "loss": 2.0908, + "step": 761 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001959955878433143, + "loss": 1.986, + "step": 762 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019598500262218386, + "loss": 2.0339, + "step": 763 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019597440371568574, + "loss": 2.0958, + "step": 764 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001959637911253312, + "loss": 1.9866, + "step": 765 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019595316485263327, + "loss": 2.2228, + "step": 766 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019594252489910706, + "loss": 1.915, + "step": 767 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019593187126626965, + "loss": 2.0741, + "step": 768 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019592120395563994, + "loss": 2.5346, + "step": 769 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019591052296873888, + "loss": 2.4908, + "step": 770 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019589982830708937, + "loss": 2.1042, + "step": 771 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019588911997221625, + "loss": 1.8676, + "step": 772 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958783979656462, + "loss": 1.9152, + "step": 773 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019586766228890806, + "loss": 1.7784, + "step": 774 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958569129435324, + "loss": 2.0784, + "step": 775 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958461499310519, + "loss": 1.7262, + "step": 776 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019583537325300118, + "loss": 2.4154, + "step": 777 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019582458291091663, + "loss": 2.3185, + "step": 778 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019581377890633684, + "loss": 2.0981, + "step": 779 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019580296124080212, + "loss": 1.8952, + "step": 780 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019579212991585493, + "loss": 1.7208, + "step": 781 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019578128493303955, + "loss": 2.0209, + "step": 782 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019577042629390217, + "loss": 2.1867, + "step": 783 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957595539999911, + "loss": 2.0805, + "step": 784 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019574866805285645, + "loss": 2.0451, + "step": 785 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019573776845405028, + "loss": 2.2056, + "step": 786 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957268552051267, + "loss": 2.0773, + "step": 787 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019571592830764165, + "loss": 2.2036, + "step": 788 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019570498776315309, + "loss": 1.7298, + "step": 789 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956940335732209, + "loss": 1.8931, + "step": 790 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956830657394069, + "loss": 2.1567, + "step": 791 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019567208426327488, + "loss": 1.9471, + "step": 792 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019566108914639054, + "loss": 1.8916, + "step": 793 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019565008039032158, + "loss": 2.0111, + "step": 794 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019563905799663752, + "loss": 2.1374, + "step": 795 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019562802196691003, + "loss": 2.3083, + "step": 796 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019561697230271254, + "loss": 2.0381, + "step": 797 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001956059090056205, + "loss": 2.1909, + "step": 798 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019559483207721133, + "loss": 1.9893, + "step": 799 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955837415190643, + "loss": 2.3178, + "step": 800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955726373327607, + "loss": 2.0815, + "step": 801 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019556151951988376, + "loss": 1.6012, + "step": 802 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019555038808201865, + "loss": 1.4965, + "step": 803 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019553924302075242, + "loss": 2.3069, + "step": 804 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019552808433767415, + "loss": 2.2388, + "step": 805 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019551691203437482, + "loss": 2.5662, + "step": 806 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019550572611244738, + "loss": 1.9419, + "step": 807 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019549452657348663, + "loss": 2.3638, + "step": 808 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019548331341908947, + "loss": 2.1567, + "step": 809 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019547208665085457, + "loss": 1.9697, + "step": 810 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019546084627038268, + "loss": 1.9006, + "step": 811 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001954495922792764, + "loss": 2.304, + "step": 812 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001954383246791403, + "loss": 2.0494, + "step": 813 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019542704347158093, + "loss": 1.8562, + "step": 814 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019541574865820672, + "loss": 2.1041, + "step": 815 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019540444024062804, + "loss": 2.22, + "step": 816 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019539311822045727, + "loss": 1.9925, + "step": 817 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019538178259930869, + "loss": 2.3213, + "step": 818 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019537043337879845, + "loss": 2.0319, + "step": 819 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019535907056054475, + "loss": 1.8578, + "step": 820 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019534769414616764, + "loss": 1.4115, + "step": 821 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953363041372892, + "loss": 2.0731, + "step": 822 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019532490053553335, + "loss": 2.0605, + "step": 823 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019531348334252607, + "loss": 1.9044, + "step": 824 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953020525598951, + "loss": 1.7405, + "step": 825 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001952906081892703, + "loss": 1.898, + "step": 826 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019527915023228332, + "loss": 1.9696, + "step": 827 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019526767869056788, + "loss": 2.0469, + "step": 828 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019525619356575952, + "loss": 2.0307, + "step": 829 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019524469485949583, + "loss": 2.002, + "step": 830 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019523318257341622, + "loss": 1.9438, + "step": 831 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019522165670916207, + "loss": 1.535, + "step": 832 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001952101172683768, + "loss": 1.7505, + "step": 833 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019519856425270562, + "loss": 2.2248, + "step": 834 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019518699766379576, + "loss": 2.0669, + "step": 835 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019517541750329635, + "loss": 2.0268, + "step": 836 + }, + { + "epoch": 1.0, + "eval_loss": 1.9969017505645752, + "eval_runtime": 283.3157, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 836 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019516382377285848, + "loss": 1.6712, + "step": 837 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001951522164741352, + "loss": 2.1558, + "step": 838 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019514059560878138, + "loss": 2.1599, + "step": 839 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019512896117845392, + "loss": 1.8762, + "step": 840 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019511731318481168, + "loss": 2.0189, + "step": 841 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019510565162951537, + "loss": 1.9364, + "step": 842 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019509397651422769, + "loss": 1.7319, + "step": 843 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019508228784061326, + "loss": 1.9424, + "step": 844 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001950705856103386, + "loss": 2.277, + "step": 845 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019505886982507225, + "loss": 1.6511, + "step": 846 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950471404864846, + "loss": 1.9056, + "step": 847 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019503539759624798, + "loss": 1.5105, + "step": 848 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950236411560367, + "loss": 1.9469, + "step": 849 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019501187116752693, + "loss": 1.5012, + "step": 850 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019500008763239683, + "loss": 1.7086, + "step": 851 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019498829055232647, + "loss": 1.5586, + "step": 852 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019497647992899788, + "loss": 1.5573, + "step": 853 + }, + { + "epoch": 1.01, + "learning_rate": 0.000194964655764095, + "loss": 2.0757, + "step": 854 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019495281805930367, + "loss": 1.5478, + "step": 855 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019494096681631172, + "loss": 1.7068, + "step": 856 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019492910203680884, + "loss": 1.6759, + "step": 857 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001949172237224867, + "loss": 1.4621, + "step": 858 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019490533187503892, + "loss": 1.5359, + "step": 859 + }, + { + "epoch": 1.02, + "learning_rate": 0.000194893426496161, + "loss": 1.9365, + "step": 860 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019488150758755035, + "loss": 1.7089, + "step": 861 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019486957515090641, + "loss": 1.4924, + "step": 862 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019485762918793046, + "loss": 1.387, + "step": 863 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001948456697003257, + "loss": 1.631, + "step": 864 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019483369668979732, + "loss": 1.7953, + "step": 865 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019482171015805245, + "loss": 1.7552, + "step": 866 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019480971010680002, + "loss": 1.8313, + "step": 867 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019479769653775106, + "loss": 1.593, + "step": 868 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019478566945261837, + "loss": 1.9506, + "step": 869 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019477362885311682, + "loss": 1.9598, + "step": 870 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947615747409631, + "loss": 1.7324, + "step": 871 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019474950711787585, + "loss": 2.1208, + "step": 872 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947374259855757, + "loss": 1.4111, + "step": 873 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019472533134578507, + "loss": 1.6696, + "step": 874 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019471322320022849, + "loss": 1.6999, + "step": 875 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019470110155063225, + "loss": 2.1287, + "step": 876 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019468896639872468, + "loss": 1.874, + "step": 877 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019467681774623592, + "loss": 1.7149, + "step": 878 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019466465559489816, + "loss": 1.9563, + "step": 879 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019465247994644545, + "loss": 1.3504, + "step": 880 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019464029080261378, + "loss": 1.6176, + "step": 881 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019462808816514103, + "loss": 1.7577, + "step": 882 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019461587203576706, + "loss": 1.8054, + "step": 883 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019460364241623358, + "loss": 2.0246, + "step": 884 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019459139930828428, + "loss": 1.7645, + "step": 885 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945791427136648, + "loss": 1.9225, + "step": 886 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019456687263412262, + "loss": 1.8967, + "step": 887 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945545890714072, + "loss": 1.5287, + "step": 888 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945422920272699, + "loss": 1.5033, + "step": 889 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019452998150346401, + "loss": 2.0148, + "step": 890 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945176575017448, + "loss": 1.3706, + "step": 891 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001945053200238693, + "loss": 1.7603, + "step": 892 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019449296907159667, + "loss": 1.9884, + "step": 893 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019448060464668783, + "loss": 1.6133, + "step": 894 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019446822675090565, + "loss": 1.7885, + "step": 895 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019445583538601498, + "loss": 1.8573, + "step": 896 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944434305537826, + "loss": 1.7241, + "step": 897 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944310122559771, + "loss": 1.8942, + "step": 898 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944185804943691, + "loss": 1.7541, + "step": 899 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019440613527073105, + "loss": 1.9608, + "step": 900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019439367658683745, + "loss": 2.0969, + "step": 901 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019438120444446457, + "loss": 2.2589, + "step": 902 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943687188453907, + "loss": 1.7335, + "step": 903 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019435621979139596, + "loss": 1.8663, + "step": 904 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019434370728426252, + "loss": 1.5627, + "step": 905 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943311813257743, + "loss": 1.6101, + "step": 906 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019431864191771732, + "loss": 1.9661, + "step": 907 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943060890618794, + "loss": 1.6487, + "step": 908 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019429352276005026, + "loss": 2.1282, + "step": 909 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019428094301402162, + "loss": 1.6944, + "step": 910 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019426834982558705, + "loss": 1.2433, + "step": 911 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019425574319654213, + "loss": 1.5735, + "step": 912 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019424312312868417, + "loss": 1.6499, + "step": 913 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019423048962381265, + "loss": 1.8366, + "step": 914 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019421784268372876, + "loss": 1.906, + "step": 915 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019420518231023568, + "loss": 1.5976, + "step": 916 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941925085051385, + "loss": 1.6722, + "step": 917 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019417982127024422, + "loss": 1.8832, + "step": 918 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019416712060736183, + "loss": 1.8865, + "step": 919 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019415440651830208, + "loss": 1.6627, + "step": 920 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941416790048778, + "loss": 1.3598, + "step": 921 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019412893806890357, + "loss": 2.0506, + "step": 922 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019411618371219605, + "loss": 1.9794, + "step": 923 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941034159365737, + "loss": 1.7851, + "step": 924 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001940906347438569, + "loss": 1.8312, + "step": 925 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019407784013586804, + "loss": 1.5167, + "step": 926 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019406503211443128, + "loss": 1.5725, + "step": 927 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019405221068137277, + "loss": 1.8857, + "step": 928 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019403937583852061, + "loss": 1.741, + "step": 929 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019402652758770475, + "loss": 1.6748, + "step": 930 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019401366593075706, + "loss": 1.7285, + "step": 931 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019400079086951135, + "loss": 1.7545, + "step": 932 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019398790240580333, + "loss": 1.4491, + "step": 933 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019397500054147058, + "loss": 1.3359, + "step": 934 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019396208527835263, + "loss": 1.9567, + "step": 935 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001939491566182909, + "loss": 2.0011, + "step": 936 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019393621456312881, + "loss": 1.9076, + "step": 937 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019392325911471155, + "loss": 1.5388, + "step": 938 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019391029027488629, + "loss": 1.2337, + "step": 939 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019389730804550211, + "loss": 1.5752, + "step": 940 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019388431242840998, + "loss": 1.9131, + "step": 941 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019387130342546284, + "loss": 1.4177, + "step": 942 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019385828103851544, + "loss": 1.5865, + "step": 943 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001938452452694245, + "loss": 1.6335, + "step": 944 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019383219612004865, + "loss": 1.8599, + "step": 945 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019381913359224842, + "loss": 1.3035, + "step": 946 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019380605768788621, + "loss": 1.7586, + "step": 947 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001937929684088264, + "loss": 1.7334, + "step": 948 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019377986575693518, + "loss": 1.5749, + "step": 949 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019376674973408075, + "loss": 1.874, + "step": 950 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019375362034213314, + "loss": 2.3055, + "step": 951 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019374047758296433, + "loss": 1.5801, + "step": 952 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001937273214584482, + "loss": 1.8788, + "step": 953 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019371415197046052, + "loss": 2.431, + "step": 954 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019370096912087897, + "loss": 1.4963, + "step": 955 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001936877729115831, + "loss": 1.514, + "step": 956 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019367456334445446, + "loss": 1.6099, + "step": 957 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019366134042137642, + "loss": 1.9367, + "step": 958 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019364810414423427, + "loss": 1.7384, + "step": 959 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019363485451491524, + "loss": 1.6166, + "step": 960 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019362159153530844, + "loss": 1.955, + "step": 961 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019360831520730482, + "loss": 1.4189, + "step": 962 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019359502553279736, + "loss": 1.4506, + "step": 963 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019358172251368087, + "loss": 1.7108, + "step": 964 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019356840615185203, + "loss": 1.6641, + "step": 965 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019355507644920952, + "loss": 1.7506, + "step": 966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019354173340765382, + "loss": 2.0598, + "step": 967 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935283770290874, + "loss": 1.3494, + "step": 968 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019351500731541453, + "loss": 1.6571, + "step": 969 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935016242685415, + "loss": 1.6403, + "step": 970 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019348822789037637, + "loss": 1.7555, + "step": 971 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019347481818282925, + "loss": 2.1451, + "step": 972 + }, + { + "epoch": 1.15, + "learning_rate": 0.000193461395147812, + "loss": 1.4522, + "step": 973 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934479587872385, + "loss": 1.7147, + "step": 974 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934345091030245, + "loss": 1.3909, + "step": 975 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019342104609708756, + "loss": 1.8104, + "step": 976 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019340756977134728, + "loss": 1.5221, + "step": 977 + }, + { + "epoch": 1.16, + "learning_rate": 0.000193394080127725, + "loss": 1.9447, + "step": 978 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001933805771681442, + "loss": 1.5742, + "step": 979 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019336706089452996, + "loss": 1.5312, + "step": 980 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019335353130880948, + "loss": 1.4304, + "step": 981 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019333998841291177, + "loss": 1.8379, + "step": 982 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019332643220876773, + "loss": 1.877, + "step": 983 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001933128626983102, + "loss": 1.9627, + "step": 984 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932992798834739, + "loss": 1.7857, + "step": 985 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019328568376619543, + "loss": 1.3189, + "step": 986 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019327207434841333, + "loss": 1.9588, + "step": 987 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019325845163206795, + "loss": 1.3132, + "step": 988 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019324481561910163, + "loss": 1.6304, + "step": 989 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932311663114586, + "loss": 1.8322, + "step": 990 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019321750371108486, + "loss": 1.4192, + "step": 991 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001932038278199285, + "loss": 1.3915, + "step": 992 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019319013863993933, + "loss": 1.8433, + "step": 993 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931764361730692, + "loss": 2.1459, + "step": 994 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931627204212717, + "loss": 1.9799, + "step": 995 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019314899138650243, + "loss": 1.855, + "step": 996 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019313524907071887, + "loss": 1.4763, + "step": 997 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019312149347588037, + "loss": 2.0128, + "step": 998 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019310772460394814, + "loss": 1.6964, + "step": 999 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001930939424568854, + "loss": 1.5864, + "step": 1000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019308014703665712, + "loss": 1.8437, + "step": 1001 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019306633834523024, + "loss": 2.1677, + "step": 1002 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019305251638457356, + "loss": 1.8872, + "step": 1003 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930386811566578, + "loss": 1.7312, + "step": 1004 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930248326634556, + "loss": 1.6772, + "step": 1005 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019301097090694143, + "loss": 1.9666, + "step": 1006 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019299709588909165, + "loss": 1.8946, + "step": 1007 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019298320761188453, + "loss": 2.1784, + "step": 1008 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001929693060773003, + "loss": 2.0249, + "step": 1009 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019295539128732093, + "loss": 1.717, + "step": 1010 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019294146324393046, + "loss": 1.8671, + "step": 1011 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019292752194911464, + "loss": 1.8388, + "step": 1012 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019291356740486123, + "loss": 1.9111, + "step": 1013 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019289959961315986, + "loss": 1.5287, + "step": 1014 + }, + { + "epoch": 1.2, + "learning_rate": 0.000192885618576002, + "loss": 1.5669, + "step": 1015 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019287162429538105, + "loss": 1.9095, + "step": 1016 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019285761677329232, + "loss": 1.9133, + "step": 1017 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019284359601173294, + "loss": 2.1099, + "step": 1018 + }, + { + "epoch": 1.21, + "learning_rate": 0.000192829562012702, + "loss": 1.6303, + "step": 1019 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019281551477820036, + "loss": 1.5907, + "step": 1020 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019280145431023097, + "loss": 1.4897, + "step": 1021 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019278738061079845, + "loss": 1.7414, + "step": 1022 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019277329368190942, + "loss": 1.816, + "step": 1023 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019275919352557241, + "loss": 1.5033, + "step": 1024 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019274508014379777, + "loss": 1.7923, + "step": 1025 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019273095353859775, + "loss": 1.3094, + "step": 1026 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019271681371198652, + "loss": 1.7689, + "step": 1027 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001927026606659801, + "loss": 1.8019, + "step": 1028 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019268849440259639, + "loss": 1.8818, + "step": 1029 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019267431492385521, + "loss": 1.7442, + "step": 1030 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019266012223177824, + "loss": 2.045, + "step": 1031 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019264591632838903, + "loss": 1.7842, + "step": 1032 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019263169721571308, + "loss": 1.5289, + "step": 1033 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019261746489577765, + "loss": 1.6013, + "step": 1034 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019260321937061202, + "loss": 1.7912, + "step": 1035 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925889606422473, + "loss": 1.7573, + "step": 1036 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925746887127164, + "loss": 1.7368, + "step": 1037 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019256040358405424, + "loss": 1.7497, + "step": 1038 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019254610525829758, + "loss": 2.0042, + "step": 1039 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019253179373748504, + "loss": 2.0732, + "step": 1040 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019251746902365708, + "loss": 1.8878, + "step": 1041 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019250313111885618, + "loss": 1.9404, + "step": 1042 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019248878002512654, + "loss": 1.5535, + "step": 1043 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019247441574451432, + "loss": 1.9344, + "step": 1044 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001924600382790676, + "loss": 1.9696, + "step": 1045 + }, + { + "epoch": 1.24, + "eval_loss": 2.064669609069824, + "eval_runtime": 283.003, + "eval_samples_per_second": 0.728, + "eval_steps_per_second": 0.728, + "step": 1045 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019244564763083624, + "loss": 1.4577, + "step": 1046 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019243124380187204, + "loss": 2.1324, + "step": 1047 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019241682679422873, + "loss": 1.4713, + "step": 1048 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019240239660996177, + "loss": 1.7455, + "step": 1049 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001923879532511287, + "loss": 1.5372, + "step": 1050 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019237349671978872, + "loss": 2.0984, + "step": 1051 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923590270180031, + "loss": 1.5023, + "step": 1052 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923445441478348, + "loss": 2.0826, + "step": 1053 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019233004811134886, + "loss": 1.7448, + "step": 1054 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019231553891061208, + "loss": 2.0249, + "step": 1055 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019230101654769312, + "loss": 1.6144, + "step": 1056 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001922864810246626, + "loss": 1.9193, + "step": 1057 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019227193234359292, + "loss": 2.0057, + "step": 1058 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019225737050655842, + "loss": 1.9493, + "step": 1059 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019224279551563532, + "loss": 1.9545, + "step": 1060 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001922282073729017, + "loss": 1.8983, + "step": 1061 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019221360608043746, + "loss": 1.9414, + "step": 1062 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019219899164032447, + "loss": 1.8471, + "step": 1063 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921843640546464, + "loss": 1.7568, + "step": 1064 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019216972332548887, + "loss": 2.0737, + "step": 1065 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921550694549393, + "loss": 1.6109, + "step": 1066 + }, + { + "epoch": 1.27, + "learning_rate": 0.000192140402445087, + "loss": 1.6684, + "step": 1067 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001921257222980232, + "loss": 1.5101, + "step": 1068 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019211102901584094, + "loss": 1.5262, + "step": 1069 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920963226006352, + "loss": 1.9757, + "step": 1070 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019208160305450272, + "loss": 2.038, + "step": 1071 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019206687037954224, + "loss": 1.4755, + "step": 1072 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019205212457785434, + "loss": 1.7406, + "step": 1073 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019203736565154137, + "loss": 1.9564, + "step": 1074 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920225936027077, + "loss": 1.823, + "step": 1075 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001920078084334595, + "loss": 1.8275, + "step": 1076 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919930101459048, + "loss": 1.7106, + "step": 1077 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019197819874215347, + "loss": 1.5958, + "step": 1078 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019196337422431735, + "loss": 2.1478, + "step": 1079 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919485365945101, + "loss": 1.7238, + "step": 1080 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019193368585484718, + "loss": 2.0758, + "step": 1081 + }, + { + "epoch": 1.28, + "learning_rate": 0.000191918822007446, + "loss": 1.8403, + "step": 1082 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019190394505442585, + "loss": 1.8286, + "step": 1083 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019188905499790789, + "loss": 1.6992, + "step": 1084 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019187415184001503, + "loss": 1.8512, + "step": 1085 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918592355828722, + "loss": 1.8236, + "step": 1086 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918443062286061, + "loss": 1.6173, + "step": 1087 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019182936377934535, + "loss": 1.8593, + "step": 1088 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918144082372204, + "loss": 1.8184, + "step": 1089 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019179943960436358, + "loss": 1.9655, + "step": 1090 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019178445788290915, + "loss": 1.5858, + "step": 1091 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019176946307499312, + "loss": 1.8359, + "step": 1092 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917544551827534, + "loss": 1.4354, + "step": 1093 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019173943420832984, + "loss": 1.4312, + "step": 1094 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917244001538641, + "loss": 2.0024, + "step": 1095 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019170935302149965, + "loss": 1.5994, + "step": 1096 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019169429281338195, + "loss": 2.05, + "step": 1097 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019167921953165825, + "loss": 1.8746, + "step": 1098 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019166413317847763, + "loss": 2.0071, + "step": 1099 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019164903375599112, + "loss": 2.0331, + "step": 1100 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019163392126635154, + "loss": 1.3587, + "step": 1101 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019161879571171362, + "loss": 1.6144, + "step": 1102 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019160365709423388, + "loss": 1.4845, + "step": 1103 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019158850541607083, + "loss": 1.4511, + "step": 1104 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019157334067938474, + "loss": 1.8015, + "step": 1105 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019155816288633776, + "loss": 1.5029, + "step": 1106 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019154297203909394, + "loss": 1.7102, + "step": 1107 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019152776813981912, + "loss": 1.6661, + "step": 1108 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001915125511906811, + "loss": 1.5872, + "step": 1109 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019149732119384943, + "loss": 1.7868, + "step": 1110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914820781514956, + "loss": 1.6365, + "step": 1111 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914668220657929, + "loss": 2.3434, + "step": 1112 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914515529389166, + "loss": 1.6458, + "step": 1113 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914362707730437, + "loss": 1.7061, + "step": 1114 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019142097557035308, + "loss": 1.8606, + "step": 1115 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019140566733302552, + "loss": 1.9415, + "step": 1116 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019139034606324362, + "loss": 1.7411, + "step": 1117 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019137501176319193, + "loss": 1.9404, + "step": 1118 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913596644350567, + "loss": 1.802, + "step": 1119 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019134430408102615, + "loss": 1.2244, + "step": 1120 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019132893070329036, + "loss": 1.902, + "step": 1121 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913135443040412, + "loss": 1.4578, + "step": 1122 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019129814488547247, + "loss": 1.6816, + "step": 1123 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001912827324497798, + "loss": 1.7293, + "step": 1124 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019126730699916061, + "loss": 1.6344, + "step": 1125 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001912518685358143, + "loss": 1.6819, + "step": 1126 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019123641706194199, + "loss": 1.6761, + "step": 1127 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019122095257974677, + "loss": 1.9222, + "step": 1128 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019120547509143354, + "loss": 1.6117, + "step": 1129 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019118998459920902, + "loss": 1.688, + "step": 1130 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019117448110528184, + "loss": 1.8383, + "step": 1131 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019115896461186245, + "loss": 1.5225, + "step": 1132 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019114343512116318, + "loss": 2.0376, + "step": 1133 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019112789263539813, + "loss": 1.5632, + "step": 1134 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019111233715678343, + "loss": 1.7049, + "step": 1135 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910967686875369, + "loss": 1.4992, + "step": 1136 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019108118722987826, + "loss": 1.7949, + "step": 1137 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019106559278602903, + "loss": 1.4688, + "step": 1138 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019104998535821274, + "loss": 1.4031, + "step": 1139 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910343649486546, + "loss": 2.1757, + "step": 1140 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019101873155958179, + "loss": 1.622, + "step": 1141 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019100308519322322, + "loss": 1.9441, + "step": 1142 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001909874258518098, + "loss": 1.8065, + "step": 1143 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019097175353757417, + "loss": 1.8348, + "step": 1144 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019095606825275083, + "loss": 2.0519, + "step": 1145 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019094036999957624, + "loss": 1.9172, + "step": 1146 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019092465878028854, + "loss": 1.9961, + "step": 1147 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019090893459712787, + "loss": 2.1239, + "step": 1148 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019089319745233611, + "loss": 1.3481, + "step": 1149 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019087744734815708, + "loss": 1.5035, + "step": 1150 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019086168428683638, + "loss": 1.818, + "step": 1151 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019084590827062145, + "loss": 2.0481, + "step": 1152 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019083011930176165, + "loss": 1.4444, + "step": 1153 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019081431738250814, + "loss": 1.6059, + "step": 1154 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907985025151139, + "loss": 2.0284, + "step": 1155 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907826747018338, + "loss": 1.8603, + "step": 1156 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019076683394492455, + "loss": 1.7189, + "step": 1157 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019075098024664468, + "loss": 1.7497, + "step": 1158 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019073511360925458, + "loss": 1.7489, + "step": 1159 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001907192340350165, + "loss": 1.6059, + "step": 1160 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019070334152619453, + "loss": 1.4407, + "step": 1161 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019068743608505455, + "loss": 1.7025, + "step": 1162 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019067151771386438, + "loss": 1.7921, + "step": 1163 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906555864148936, + "loss": 1.6147, + "step": 1164 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906396421904137, + "loss": 1.6192, + "step": 1165 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019062368504269795, + "loss": 1.4341, + "step": 1166 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019060771497402147, + "loss": 1.3054, + "step": 1167 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001905917319866613, + "loss": 2.041, + "step": 1168 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019057573608289623, + "loss": 2.004, + "step": 1169 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019055972726500695, + "loss": 1.4002, + "step": 1170 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019054370553527595, + "loss": 1.5554, + "step": 1171 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019052767089598754, + "loss": 1.9783, + "step": 1172 + }, + { + "epoch": 1.39, + "learning_rate": 0.000190511623349428, + "loss": 1.7443, + "step": 1173 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019049556289788528, + "loss": 1.6089, + "step": 1174 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001904794895436493, + "loss": 1.8784, + "step": 1175 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904634032890117, + "loss": 2.0985, + "step": 1176 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904473041362661, + "loss": 1.811, + "step": 1177 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019043119208770793, + "loss": 1.407, + "step": 1178 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904150671456343, + "loss": 1.7269, + "step": 1179 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019039892931234435, + "loss": 1.8374, + "step": 1180 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019038277859013896, + "loss": 1.583, + "step": 1181 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019036661498132086, + "loss": 1.6407, + "step": 1182 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019035043848819464, + "loss": 2.0828, + "step": 1183 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019033424911306672, + "loss": 1.7067, + "step": 1184 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019031804685824534, + "loss": 1.55, + "step": 1185 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001903018317260406, + "loss": 1.7573, + "step": 1186 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019028560371876446, + "loss": 1.5666, + "step": 1187 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001902693628387306, + "loss": 1.5192, + "step": 1188 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019025310908825466, + "loss": 2.0093, + "step": 1189 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019023684246965406, + "loss": 1.8414, + "step": 1190 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019022056298524808, + "loss": 1.3696, + "step": 1191 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019020427063735782, + "loss": 1.6336, + "step": 1192 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019018796542830617, + "loss": 1.8528, + "step": 1193 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019017164736041795, + "loss": 2.0523, + "step": 1194 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019015531643601973, + "loss": 1.7526, + "step": 1195 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019013897265743998, + "loss": 1.8391, + "step": 1196 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019012261602700892, + "loss": 1.4257, + "step": 1197 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019010624654705867, + "loss": 2.0911, + "step": 1198 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001900898642199232, + "loss": 1.7578, + "step": 1199 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019007346904793818, + "loss": 1.9003, + "step": 1200 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900570610334413, + "loss": 1.3918, + "step": 1201 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900406401787719, + "loss": 2.0365, + "step": 1202 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019002420648627131, + "loss": 1.5184, + "step": 1203 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019000775995828254, + "loss": 1.6412, + "step": 1204 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018999130059715058, + "loss": 1.5031, + "step": 1205 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018997482840522217, + "loss": 1.4421, + "step": 1206 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018995834338484584, + "loss": 1.9431, + "step": 1207 + }, + { + "epoch": 1.43, + "learning_rate": 0.000189941845538372, + "loss": 1.8141, + "step": 1208 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001899253348681529, + "loss": 1.7289, + "step": 1209 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018990881137654258, + "loss": 1.7217, + "step": 1210 + }, + { + "epoch": 1.44, + "learning_rate": 0.000189892275065897, + "loss": 2.3727, + "step": 1211 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018987572593857381, + "loss": 1.4833, + "step": 1212 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018985916399693256, + "loss": 2.13, + "step": 1213 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018984258924333464, + "loss": 1.875, + "step": 1214 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018982600168014323, + "loss": 1.783, + "step": 1215 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018980940130972337, + "loss": 1.6815, + "step": 1216 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897927881344419, + "loss": 2.049, + "step": 1217 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018977616215666752, + "loss": 1.918, + "step": 1218 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897595233787707, + "loss": 1.5824, + "step": 1219 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018974287180312377, + "loss": 1.7473, + "step": 1220 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018972620743210093, + "loss": 1.6915, + "step": 1221 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897095302680781, + "loss": 1.7633, + "step": 1222 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018969284031343308, + "loss": 1.6921, + "step": 1223 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018967613757054554, + "loss": 1.5433, + "step": 1224 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018965942204179686, + "loss": 1.9389, + "step": 1225 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018964269372957038, + "loss": 1.5625, + "step": 1226 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018962595263625115, + "loss": 1.4835, + "step": 1227 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018960919876422611, + "loss": 1.8479, + "step": 1228 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018959243211588397, + "loss": 1.7861, + "step": 1229 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018957565269361531, + "loss": 1.867, + "step": 1230 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018955886049981245, + "loss": 1.9383, + "step": 1231 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001895420555368697, + "loss": 1.755, + "step": 1232 + }, + { + "epoch": 1.46, + "learning_rate": 0.000189525237807183, + "loss": 1.5166, + "step": 1233 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018950840731315024, + "loss": 1.8629, + "step": 1234 + }, + { + "epoch": 1.47, + "learning_rate": 0.000189491564057171, + "loss": 1.6845, + "step": 1235 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018947470804164685, + "loss": 1.4748, + "step": 1236 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018945783926898105, + "loss": 1.8907, + "step": 1237 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018944095774157873, + "loss": 1.5758, + "step": 1238 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018942406346184683, + "loss": 1.6367, + "step": 1239 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018940715643219407, + "loss": 1.7285, + "step": 1240 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018939023665503108, + "loss": 1.5714, + "step": 1241 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001893733041327702, + "loss": 1.9308, + "step": 1242 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018935635886782568, + "loss": 1.9153, + "step": 1243 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018933940086261351, + "loss": 1.8009, + "step": 1244 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018932243011955154, + "loss": 1.7392, + "step": 1245 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018930544664105944, + "loss": 1.821, + "step": 1246 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001892884504295587, + "loss": 1.475, + "step": 1247 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018927144148747255, + "loss": 1.8937, + "step": 1248 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018925441981722618, + "loss": 1.6958, + "step": 1249 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018923738542124644, + "loss": 1.6836, + "step": 1250 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018922033830196208, + "loss": 2.0213, + "step": 1251 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018920327846180365, + "loss": 1.9572, + "step": 1252 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018918620590320352, + "loss": 1.9449, + "step": 1253 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018916912062859583, + "loss": 1.7297, + "step": 1254 + }, + { + "epoch": 1.49, + "eval_loss": 2.0551259517669678, + "eval_runtime": 283.8338, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1254 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018915202264041664, + "loss": 1.8158, + "step": 1255 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891349119411037, + "loss": 1.921, + "step": 1256 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018911778853309658, + "loss": 1.5726, + "step": 1257 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891006524188368, + "loss": 1.6641, + "step": 1258 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018908350360076752, + "loss": 1.5841, + "step": 1259 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018906634208133385, + "loss": 1.8567, + "step": 1260 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018904916786298257, + "loss": 1.5584, + "step": 1261 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018903198094816242, + "loss": 1.6615, + "step": 1262 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018901478133932385, + "loss": 1.7477, + "step": 1263 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018899756903891914, + "loss": 1.3796, + "step": 1264 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018898034404940238, + "loss": 1.7991, + "step": 1265 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018896310637322953, + "loss": 1.4944, + "step": 1266 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018894585601285827, + "loss": 1.5719, + "step": 1267 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018892859297074812, + "loss": 1.5495, + "step": 1268 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018891131724936043, + "loss": 1.7611, + "step": 1269 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018889402885115833, + "loss": 1.5991, + "step": 1270 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018887672777860676, + "loss": 1.8849, + "step": 1271 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888594140341725, + "loss": 1.6136, + "step": 1272 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888420876203241, + "loss": 1.8288, + "step": 1273 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888247485395319, + "loss": 1.6625, + "step": 1274 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018880739679426816, + "loss": 1.49, + "step": 1275 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018879003238700675, + "loss": 1.874, + "step": 1276 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018877265532022352, + "loss": 1.751, + "step": 1277 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018875526559639604, + "loss": 1.9882, + "step": 1278 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018873786321800374, + "loss": 1.5214, + "step": 1279 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001887204481875278, + "loss": 1.741, + "step": 1280 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018870302050745118, + "loss": 1.7798, + "step": 1281 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018868558018025878, + "loss": 1.9258, + "step": 1282 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001886681272084371, + "loss": 1.9096, + "step": 1283 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018865066159447466, + "loss": 1.6729, + "step": 1284 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018863318334086157, + "loss": 1.6239, + "step": 1285 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018861569245008994, + "loss": 1.9857, + "step": 1286 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018859818892465354, + "loss": 1.9905, + "step": 1287 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188580672767048, + "loss": 2.0073, + "step": 1288 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018856314397977075, + "loss": 1.7109, + "step": 1289 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188545602565321, + "loss": 1.3727, + "step": 1290 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018852804852619975, + "loss": 1.7045, + "step": 1291 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018851048186490992, + "loss": 1.9042, + "step": 1292 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018849290258395602, + "loss": 1.7174, + "step": 1293 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018847531068584452, + "loss": 1.6502, + "step": 1294 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018845770617308366, + "loss": 1.8582, + "step": 1295 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001884400890481834, + "loss": 1.4846, + "step": 1296 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018842245931365562, + "loss": 1.5428, + "step": 1297 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018840481697201392, + "loss": 1.7266, + "step": 1298 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001883871620257737, + "loss": 1.9324, + "step": 1299 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018836949447745215, + "loss": 1.577, + "step": 1300 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001883518143295683, + "loss": 1.6388, + "step": 1301 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018833412158464298, + "loss": 1.9201, + "step": 1302 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018831641624519877, + "loss": 1.6478, + "step": 1303 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018829869831376005, + "loss": 1.6826, + "step": 1304 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018828096779285303, + "loss": 1.8513, + "step": 1305 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018826322468500566, + "loss": 1.571, + "step": 1306 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018824546899274777, + "loss": 1.1602, + "step": 1307 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001882277007186109, + "loss": 1.9998, + "step": 1308 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001882099198651284, + "loss": 1.7034, + "step": 1309 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001881921264348355, + "loss": 1.4031, + "step": 1310 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018817432043026911, + "loss": 1.8413, + "step": 1311 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018815650185396797, + "loss": 1.6606, + "step": 1312 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018813867070847264, + "loss": 1.5792, + "step": 1313 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018812082699632546, + "loss": 1.4525, + "step": 1314 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018810297072007054, + "loss": 1.4906, + "step": 1315 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018808510188225377, + "loss": 1.6284, + "step": 1316 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880672204854229, + "loss": 1.7281, + "step": 1317 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880493265321274, + "loss": 1.5345, + "step": 1318 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018803142002491856, + "loss": 2.0933, + "step": 1319 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018801350096634946, + "loss": 1.9372, + "step": 1320 + }, + { + "epoch": 1.57, + "learning_rate": 0.000187995569358975, + "loss": 1.7151, + "step": 1321 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018797762520535177, + "loss": 1.4823, + "step": 1322 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001879596685080383, + "loss": 2.0495, + "step": 1323 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018794169926959474, + "loss": 2.2966, + "step": 1324 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018792371749258314, + "loss": 1.7868, + "step": 1325 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018790572317956735, + "loss": 1.9403, + "step": 1326 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018788771633311292, + "loss": 1.6687, + "step": 1327 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018786969695578723, + "loss": 1.8422, + "step": 1328 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018785166505015948, + "loss": 1.5916, + "step": 1329 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018783362061880062, + "loss": 1.9119, + "step": 1330 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018781556366428336, + "loss": 1.4903, + "step": 1331 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018779749418918227, + "loss": 1.9497, + "step": 1332 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018777941219607364, + "loss": 1.9462, + "step": 1333 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018776131768753556, + "loss": 2.0474, + "step": 1334 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018774321066614795, + "loss": 1.4474, + "step": 1335 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018772509113449245, + "loss": 1.8315, + "step": 1336 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018770695909515247, + "loss": 1.7684, + "step": 1337 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018768881455071332, + "loss": 1.2675, + "step": 1338 + }, + { + "epoch": 1.59, + "learning_rate": 0.000187670657503762, + "loss": 1.8226, + "step": 1339 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018765248795688726, + "loss": 2.2112, + "step": 1340 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001876343059126797, + "loss": 1.3627, + "step": 1341 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018761611137373173, + "loss": 2.1488, + "step": 1342 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018759790434263744, + "loss": 1.9842, + "step": 1343 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018757968482199276, + "loss": 1.9775, + "step": 1344 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018756145281439545, + "loss": 1.6835, + "step": 1345 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875432083224449, + "loss": 1.5272, + "step": 1346 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875249513487425, + "loss": 1.7539, + "step": 1347 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018750668189589117, + "loss": 1.874, + "step": 1348 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018748839996649583, + "loss": 1.5858, + "step": 1349 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018747010556316305, + "loss": 1.9298, + "step": 1350 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001874517986885012, + "loss": 1.5079, + "step": 1351 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018743347934512046, + "loss": 1.884, + "step": 1352 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018741514753563277, + "loss": 1.7978, + "step": 1353 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873968032626518, + "loss": 1.7735, + "step": 1354 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018737844652879312, + "loss": 1.7227, + "step": 1355 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018736007733667393, + "loss": 1.8458, + "step": 1356 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018734169568891334, + "loss": 1.3268, + "step": 1357 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873233015881321, + "loss": 1.3782, + "step": 1358 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018730489503695287, + "loss": 1.9614, + "step": 1359 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018728647603800003, + "loss": 1.7755, + "step": 1360 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018726804459389963, + "loss": 1.7961, + "step": 1361 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018724960070727972, + "loss": 1.7158, + "step": 1362 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872311443807699, + "loss": 1.6303, + "step": 1363 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872126756170017, + "loss": 1.8734, + "step": 1364 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018719419441860834, + "loss": 1.5143, + "step": 1365 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001871757007882248, + "loss": 1.498, + "step": 1366 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001871571947284879, + "loss": 1.0886, + "step": 1367 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018713867624203621, + "loss": 1.6633, + "step": 1368 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018712014533151008, + "loss": 1.8895, + "step": 1369 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018710160199955156, + "loss": 1.4178, + "step": 1370 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018708304624880456, + "loss": 1.6814, + "step": 1371 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001870644780819147, + "loss": 1.8671, + "step": 1372 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018704589750152944, + "loss": 1.4786, + "step": 1373 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018702730451029796, + "loss": 1.8622, + "step": 1374 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018700869911087115, + "loss": 1.8891, + "step": 1375 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869900813059018, + "loss": 2.0493, + "step": 1376 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018697145109804436, + "loss": 1.7238, + "step": 1377 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018695280848995513, + "loss": 1.7826, + "step": 1378 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869341534842921, + "loss": 1.8557, + "step": 1379 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869154860837151, + "loss": 1.7492, + "step": 1380 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868968062908857, + "loss": 1.7441, + "step": 1381 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868781141084672, + "loss": 1.8322, + "step": 1382 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868594095391247, + "loss": 1.8177, + "step": 1383 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018684069258552508, + "loss": 2.0001, + "step": 1384 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018682196325033696, + "loss": 1.5046, + "step": 1385 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018680322153623075, + "loss": 1.6789, + "step": 1386 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867844674458786, + "loss": 1.6951, + "step": 1387 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018676570098195443, + "loss": 2.0334, + "step": 1388 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018674692214713388, + "loss": 1.7833, + "step": 1389 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867281309440945, + "loss": 1.82, + "step": 1390 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018670932737551547, + "loss": 1.8155, + "step": 1391 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018669051144407775, + "loss": 1.7912, + "step": 1392 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018667168315246406, + "loss": 1.5816, + "step": 1393 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018665284250335895, + "loss": 1.7521, + "step": 1394 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018663398949944865, + "loss": 1.4287, + "step": 1395 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018661512414342127, + "loss": 1.6026, + "step": 1396 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018659624643796647, + "loss": 1.6953, + "step": 1397 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018657735638577587, + "loss": 1.8515, + "step": 1398 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018655845398954276, + "loss": 2.0384, + "step": 1399 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018653953925196225, + "loss": 1.5458, + "step": 1400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018652061217573114, + "loss": 1.7166, + "step": 1401 + }, + { + "epoch": 1.67, + "learning_rate": 0.000186501672763548, + "loss": 1.5653, + "step": 1402 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018648272101811318, + "loss": 2.0928, + "step": 1403 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018646375694212884, + "loss": 1.605, + "step": 1404 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018644478053829878, + "loss": 1.4734, + "step": 1405 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018642579180932865, + "loss": 2.0578, + "step": 1406 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018640679075792582, + "loss": 1.9823, + "step": 1407 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018638777738679943, + "loss": 2.0551, + "step": 1408 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018636875169866036, + "loss": 1.6315, + "step": 1409 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863497136962213, + "loss": 1.8965, + "step": 1410 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863306633821966, + "loss": 1.3584, + "step": 1411 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018631160075930245, + "loss": 1.9673, + "step": 1412 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018629252583025676, + "loss": 1.5277, + "step": 1413 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001862734385977792, + "loss": 1.6788, + "step": 1414 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018625433906459116, + "loss": 1.432, + "step": 1415 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018623522723341588, + "loss": 1.8102, + "step": 1416 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018621610310697823, + "loss": 1.6713, + "step": 1417 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018619696668800492, + "loss": 1.6989, + "step": 1418 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001861778179792244, + "loss": 1.7645, + "step": 1419 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018615865698336684, + "loss": 1.594, + "step": 1420 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018613948370316415, + "loss": 1.8751, + "step": 1421 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018612029814135014, + "loss": 1.64, + "step": 1422 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018610110030066007, + "loss": 1.5066, + "step": 1423 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001860818901838313, + "loss": 1.9817, + "step": 1424 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018606266779360266, + "loss": 2.056, + "step": 1425 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860434331327149, + "loss": 1.6997, + "step": 1426 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018602418620391044, + "loss": 1.5573, + "step": 1427 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860049270099335, + "loss": 1.8427, + "step": 1428 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018598565555353, + "loss": 2.012, + "step": 1429 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018596637183744763, + "loss": 1.7976, + "step": 1430 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018594707586443585, + "loss": 1.4, + "step": 1431 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001859277676372458, + "loss": 1.8717, + "step": 1432 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018590844715863045, + "loss": 1.4311, + "step": 1433 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018588911443134448, + "loss": 1.5903, + "step": 1434 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018586976945814425, + "loss": 2.0898, + "step": 1435 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018585041224178803, + "loss": 1.5302, + "step": 1436 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018583104278503568, + "loss": 1.9582, + "step": 1437 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018581166109064886, + "loss": 1.5264, + "step": 1438 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018579226716139096, + "loss": 1.6551, + "step": 1439 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018577286100002723, + "loss": 1.7774, + "step": 1440 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018575344260932444, + "loss": 1.8316, + "step": 1441 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001857340119920513, + "loss": 1.3916, + "step": 1442 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018571456915097818, + "loss": 1.6728, + "step": 1443 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856951140888772, + "loss": 1.7247, + "step": 1444 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018567564680852224, + "loss": 1.4539, + "step": 1445 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018565616731268888, + "loss": 1.613, + "step": 1446 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856366756041545, + "loss": 1.757, + "step": 1447 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018561717168569816, + "loss": 1.6903, + "step": 1448 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018559765556010072, + "loss": 1.7322, + "step": 1449 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018557812723014476, + "loss": 1.5627, + "step": 1450 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018555858669861458, + "loss": 1.8751, + "step": 1451 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018553903396829625, + "loss": 1.2721, + "step": 1452 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018551946904197752, + "loss": 1.8167, + "step": 1453 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018549989192244797, + "loss": 1.6602, + "step": 1454 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018548030261249885, + "loss": 1.9053, + "step": 1455 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018546070111492315, + "loss": 1.7721, + "step": 1456 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018544108743251566, + "loss": 2.1421, + "step": 1457 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018542146156807284, + "loss": 1.5076, + "step": 1458 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018540182352439288, + "loss": 1.9039, + "step": 1459 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018538217330427582, + "loss": 1.9777, + "step": 1460 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018536251091052323, + "loss": 1.5702, + "step": 1461 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018534283634593862, + "loss": 1.851, + "step": 1462 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018532314961332717, + "loss": 1.5337, + "step": 1463 + }, + { + "epoch": 1.74, + "eval_loss": 2.068387508392334, + "eval_runtime": 283.4638, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 1463 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018530345071549574, + "loss": 1.7553, + "step": 1464 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018528373965525296, + "loss": 1.4175, + "step": 1465 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018526401643540922, + "loss": 1.7216, + "step": 1466 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018524428105877664, + "loss": 1.6415, + "step": 1467 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018522453352816896, + "loss": 1.7284, + "step": 1468 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018520477384640187, + "loss": 1.8314, + "step": 1469 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018518500201629258, + "loss": 1.8341, + "step": 1470 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018516521804066015, + "loss": 1.4129, + "step": 1471 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018514542192232537, + "loss": 1.4671, + "step": 1472 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018512561366411067, + "loss": 1.6665, + "step": 1473 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018510579326884034, + "loss": 1.5722, + "step": 1474 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850859607393403, + "loss": 1.9348, + "step": 1475 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850661160784383, + "loss": 1.5404, + "step": 1476 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018504625928896363, + "loss": 1.4769, + "step": 1477 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018502639037374757, + "loss": 1.4149, + "step": 1478 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850065093356229, + "loss": 1.958, + "step": 1479 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018498661617742426, + "loss": 1.8319, + "step": 1480 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018496671090198797, + "loss": 1.5948, + "step": 1481 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001849467935121521, + "loss": 1.8469, + "step": 1482 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018492686401075644, + "loss": 1.6798, + "step": 1483 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001849069224006425, + "loss": 1.8197, + "step": 1484 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848869686846535, + "loss": 1.6613, + "step": 1485 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848670028656344, + "loss": 1.7322, + "step": 1486 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018484702494643188, + "loss": 2.0493, + "step": 1487 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018482703492989444, + "loss": 1.7182, + "step": 1488 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018480703281887215, + "loss": 1.689, + "step": 1489 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018478701861621686, + "loss": 1.9477, + "step": 1490 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001847669923247822, + "loss": 1.8171, + "step": 1491 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018474695394742345, + "loss": 1.7337, + "step": 1492 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847269034869977, + "loss": 1.6983, + "step": 1493 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847068409463636, + "loss": 1.6445, + "step": 1494 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846867663283818, + "loss": 1.9965, + "step": 1495 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846666796359143, + "loss": 1.6775, + "step": 1496 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846465808718252, + "loss": 1.8117, + "step": 1497 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018462647003898006, + "loss": 1.8803, + "step": 1498 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018460634714024624, + "loss": 1.3045, + "step": 1499 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018458621217849286, + "loss": 1.7768, + "step": 1500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018456606515659073, + "loss": 2.0641, + "step": 1501 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001845459060774123, + "loss": 1.3804, + "step": 1502 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018452573494383192, + "loss": 1.6271, + "step": 1503 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018450555175872547, + "loss": 1.8525, + "step": 1504 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018448535652497073, + "loss": 1.5303, + "step": 1505 + }, + { + "epoch": 1.79, + "learning_rate": 0.000184465149245447, + "loss": 2.0368, + "step": 1506 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018444492992303544, + "loss": 1.9951, + "step": 1507 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001844246985606189, + "loss": 1.8715, + "step": 1508 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018440445516108186, + "loss": 1.7373, + "step": 1509 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018438419972731067, + "loss": 1.7667, + "step": 1510 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018436393226219327, + "loss": 1.5134, + "step": 1511 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018434365276861938, + "loss": 1.3891, + "step": 1512 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001843233612494804, + "loss": 1.7066, + "step": 1513 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018430305770766948, + "loss": 1.6366, + "step": 1514 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842827421460814, + "loss": 1.7838, + "step": 1515 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842624145676128, + "loss": 1.7884, + "step": 1516 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001842420749751619, + "loss": 1.8428, + "step": 1517 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018422172337162867, + "loss": 1.4987, + "step": 1518 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018420135975991483, + "loss": 1.7576, + "step": 1519 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001841809841429238, + "loss": 1.8522, + "step": 1520 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018416059652356066, + "loss": 1.9308, + "step": 1521 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018414019690473227, + "loss": 1.4658, + "step": 1522 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018411978528934717, + "loss": 1.7072, + "step": 1523 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001840993616803156, + "loss": 1.736, + "step": 1524 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840789260805495, + "loss": 1.7712, + "step": 1525 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840584784929626, + "loss": 1.2231, + "step": 1526 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018403801892047023, + "loss": 1.8421, + "step": 1527 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018401754736598947, + "loss": 1.2689, + "step": 1528 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018399706383243918, + "loss": 1.8062, + "step": 1529 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839765683227398, + "loss": 1.6846, + "step": 1530 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839560608398136, + "loss": 1.8201, + "step": 1531 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018393554138658441, + "loss": 1.6958, + "step": 1532 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018391500996597796, + "loss": 1.8487, + "step": 1533 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001838944665809215, + "loss": 1.9788, + "step": 1534 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018387391123434412, + "loss": 1.6002, + "step": 1535 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018385334392917658, + "loss": 1.3859, + "step": 1536 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018383276466835127, + "loss": 2.0743, + "step": 1537 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018381217345480235, + "loss": 1.8357, + "step": 1538 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018379157029146573, + "loss": 1.7002, + "step": 1539 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018377095518127897, + "loss": 1.3058, + "step": 1540 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018375032812718124, + "loss": 1.8745, + "step": 1541 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018372968913211364, + "loss": 1.7847, + "step": 1542 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018370903819901874, + "loss": 1.8156, + "step": 1543 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018368837533084095, + "loss": 2.0152, + "step": 1544 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018366770053052634, + "loss": 1.5656, + "step": 1545 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018364701380102266, + "loss": 1.5753, + "step": 1546 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018362631514527947, + "loss": 1.3938, + "step": 1547 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018360560456624788, + "loss": 1.9599, + "step": 1548 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018358488206688075, + "loss": 1.8641, + "step": 1549 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018356414765013267, + "loss": 1.8428, + "step": 1550 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018354340131895998, + "loss": 1.6016, + "step": 1551 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018352264307632056, + "loss": 1.5768, + "step": 1552 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018350187292517415, + "loss": 1.5369, + "step": 1553 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001834810908684821, + "loss": 1.9717, + "step": 1554 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018346029690920746, + "loss": 1.943, + "step": 1555 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018343949105031505, + "loss": 1.8166, + "step": 1556 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018341867329477125, + "loss": 1.7149, + "step": 1557 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018339784364554426, + "loss": 1.4657, + "step": 1558 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018337700210560397, + "loss": 1.8693, + "step": 1559 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018335614867792183, + "loss": 1.7656, + "step": 1560 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001833352833654712, + "loss": 1.5123, + "step": 1561 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018331440617122696, + "loss": 1.7884, + "step": 1562 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832935170981657, + "loss": 1.7309, + "step": 1563 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018327261614926583, + "loss": 1.9628, + "step": 1564 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018325170332750732, + "loss": 1.6409, + "step": 1565 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832307786358719, + "loss": 1.6093, + "step": 1566 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018320984207734298, + "loss": 1.6111, + "step": 1567 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018318889365490565, + "loss": 2.0085, + "step": 1568 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018316793337154664, + "loss": 2.079, + "step": 1569 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018314696123025454, + "loss": 1.5466, + "step": 1570 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018312597723401942, + "loss": 2.0825, + "step": 1571 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001831049813858332, + "loss": 1.9748, + "step": 1572 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018308397368868945, + "loss": 1.6529, + "step": 1573 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018306295414558335, + "loss": 1.7119, + "step": 1574 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018304192275951184, + "loss": 1.8812, + "step": 1575 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018302087953347352, + "loss": 1.8676, + "step": 1576 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018299982447046877, + "loss": 1.879, + "step": 1577 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018297875757349952, + "loss": 1.6282, + "step": 1578 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018295767884556947, + "loss": 1.735, + "step": 1579 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018293658828968397, + "loss": 1.5796, + "step": 1580 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018291548590885007, + "loss": 1.8258, + "step": 1581 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018289437170607658, + "loss": 1.7531, + "step": 1582 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018287324568437381, + "loss": 1.6265, + "step": 1583 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018285210784675394, + "loss": 1.7997, + "step": 1584 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018283095819623078, + "loss": 1.955, + "step": 1585 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018280979673581977, + "loss": 1.6542, + "step": 1586 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018278862346853808, + "loss": 1.7634, + "step": 1587 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018276743839740458, + "loss": 2.0077, + "step": 1588 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018274624152543977, + "loss": 2.0254, + "step": 1589 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018272503285566587, + "loss": 1.4464, + "step": 1590 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018270381239110677, + "loss": 1.8643, + "step": 1591 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018268258013478804, + "loss": 1.3278, + "step": 1592 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018266133608973696, + "loss": 1.744, + "step": 1593 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018264008025898248, + "loss": 1.5079, + "step": 1594 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018261881264555516, + "loss": 1.9655, + "step": 1595 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001825975332524873, + "loss": 2.0557, + "step": 1596 + }, + { + "epoch": 1.9, + "learning_rate": 0.000182576242082813, + "loss": 1.7174, + "step": 1597 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018255493913956774, + "loss": 1.449, + "step": 1598 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018253362442578896, + "loss": 1.9058, + "step": 1599 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018251229794451567, + "loss": 1.3482, + "step": 1600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018249095969878853, + "loss": 1.7906, + "step": 1601 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018246960969164994, + "loss": 1.6177, + "step": 1602 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018244824792614393, + "loss": 1.5786, + "step": 1603 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018242687440531618, + "loss": 1.6451, + "step": 1604 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018240548913221416, + "loss": 1.3695, + "step": 1605 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001823840921098869, + "loss": 1.6648, + "step": 1606 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018236268334138515, + "loss": 2.1548, + "step": 1607 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018234126282976133, + "loss": 1.6153, + "step": 1608 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001823198305780696, + "loss": 1.741, + "step": 1609 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018229838658936564, + "loss": 1.7827, + "step": 1610 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018227693086670697, + "loss": 1.7343, + "step": 1611 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018225546341315261, + "loss": 1.8149, + "step": 1612 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001822339842317635, + "loss": 1.5497, + "step": 1613 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018221249332560198, + "loss": 1.7659, + "step": 1614 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001821909906977322, + "loss": 1.8992, + "step": 1615 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018216947635122, + "loss": 1.8682, + "step": 1616 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018214795028913288, + "loss": 1.9774, + "step": 1617 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001821264125145399, + "loss": 1.9441, + "step": 1618 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018210486303051195, + "loss": 2.0314, + "step": 1619 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001820833018401215, + "loss": 1.8234, + "step": 1620 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018206172894644272, + "loss": 1.9478, + "step": 1621 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018204014435255135, + "loss": 1.7894, + "step": 1622 + }, + { + "epoch": 1.93, + "learning_rate": 0.000182018548061525, + "loss": 1.5469, + "step": 1623 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018199694007644277, + "loss": 1.9419, + "step": 1624 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018197532040038547, + "loss": 1.6686, + "step": 1625 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018195368903643563, + "loss": 2.2525, + "step": 1626 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018193204598767744, + "loss": 1.8076, + "step": 1627 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018191039125719662, + "loss": 1.976, + "step": 1628 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018188872484808076, + "loss": 1.6896, + "step": 1629 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018186704676341898, + "loss": 1.6784, + "step": 1630 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018184535700630213, + "loss": 1.9634, + "step": 1631 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018182365557982264, + "loss": 1.7406, + "step": 1632 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018180194248707473, + "loss": 1.7492, + "step": 1633 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018178021773115414, + "loss": 1.7731, + "step": 1634 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018175848131515837, + "loss": 1.6232, + "step": 1635 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817367332421866, + "loss": 1.7488, + "step": 1636 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817149735153396, + "loss": 1.3398, + "step": 1637 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018169320213771983, + "loss": 1.4521, + "step": 1638 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018167141911243145, + "loss": 1.6311, + "step": 1639 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018164962444258014, + "loss": 1.8911, + "step": 1640 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018162781813127346, + "loss": 1.9879, + "step": 1641 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001816060001816205, + "loss": 1.5637, + "step": 1642 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018158417059673196, + "loss": 1.7461, + "step": 1643 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815623293797203, + "loss": 1.6671, + "step": 1644 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815404765336996, + "loss": 1.2124, + "step": 1645 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815186120617856, + "loss": 1.6402, + "step": 1646 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001814967359670957, + "loss": 1.8837, + "step": 1647 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018147484825274893, + "loss": 1.8027, + "step": 1648 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018145294892186605, + "loss": 1.7684, + "step": 1649 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001814310379775694, + "loss": 1.8274, + "step": 1650 + }, + { + "epoch": 1.97, + "learning_rate": 0.000181409115422983, + "loss": 1.8292, + "step": 1651 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018138718126123248, + "loss": 1.3492, + "step": 1652 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018136523549544523, + "loss": 1.509, + "step": 1653 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018134327812875024, + "loss": 1.7415, + "step": 1654 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018132130916427816, + "loss": 1.5223, + "step": 1655 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018129932860516126, + "loss": 1.9294, + "step": 1656 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018127733645453348, + "loss": 2.0716, + "step": 1657 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018125533271553043, + "loss": 1.57, + "step": 1658 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018123331739128938, + "loss": 2.2132, + "step": 1659 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018121129048494922, + "loss": 1.9006, + "step": 1660 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018118925199965048, + "loss": 1.9319, + "step": 1661 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018116720193853543, + "loss": 1.8103, + "step": 1662 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018114514030474787, + "loss": 1.7028, + "step": 1663 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018112306710143334, + "loss": 1.802, + "step": 1664 + }, + { + "epoch": 1.98, + "learning_rate": 0.000181100982331739, + "loss": 1.6835, + "step": 1665 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001810788859988136, + "loss": 1.7223, + "step": 1666 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810567781058077, + "loss": 1.5829, + "step": 1667 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018103465865587333, + "loss": 1.9863, + "step": 1668 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810125276521642, + "loss": 1.6398, + "step": 1669 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018099038509783582, + "loss": 1.9261, + "step": 1670 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018096823099604517, + "loss": 1.8882, + "step": 1671 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018094606534995093, + "loss": 1.6716, + "step": 1672 + }, + { + "epoch": 1.99, + "eval_loss": 2.075261354446411, + "eval_runtime": 283.9438, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 1672 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018092388816271345, + "loss": 1.6688, + "step": 1673 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018090169943749476, + "loss": 1.9127, + "step": 1674 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001808794991774584, + "loss": 1.7214, + "step": 1675 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018085728738576973, + "loss": 1.785, + "step": 1676 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018083506406559561, + "loss": 1.5287, + "step": 1677 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018081282922010464, + "loss": 1.9012, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018079058285246698, + "loss": 1.3094, + "step": 1679 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807683249658545, + "loss": 1.818, + "step": 1680 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807460555634407, + "loss": 1.9389, + "step": 1681 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807237746484007, + "loss": 1.4334, + "step": 1682 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018070148222391126, + "loss": 1.5422, + "step": 1683 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806791782931508, + "loss": 1.7899, + "step": 1684 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806568628592994, + "loss": 1.6106, + "step": 1685 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018063453592553872, + "loss": 1.9807, + "step": 1686 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806121974950521, + "loss": 1.1762, + "step": 1687 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018058984757102456, + "loss": 1.8338, + "step": 1688 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001805674861566426, + "loss": 1.5556, + "step": 1689 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805451132550946, + "loss": 0.87, + "step": 1690 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018052272886957038, + "loss": 1.0386, + "step": 1691 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805003330032615, + "loss": 0.8153, + "step": 1692 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018047792565936102, + "loss": 1.1745, + "step": 1693 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018045550684106388, + "loss": 1.1584, + "step": 1694 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018043307655156644, + "loss": 1.0742, + "step": 1695 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018041063479406675, + "loss": 1.0537, + "step": 1696 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803881815717646, + "loss": 1.0239, + "step": 1697 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803657168878612, + "loss": 0.9182, + "step": 1698 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018034324074555965, + "loss": 1.1856, + "step": 1699 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018032075314806448, + "loss": 1.3285, + "step": 1700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018029825409858198, + "loss": 1.2912, + "step": 1701 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018027574360032, + "loss": 1.3666, + "step": 1702 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018025322165648807, + "loss": 0.9621, + "step": 1703 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018023068827029723, + "loss": 0.8484, + "step": 1704 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018020814344496037, + "loss": 1.2236, + "step": 1705 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018018558718369186, + "loss": 0.8155, + "step": 1706 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001801630194897077, + "loss": 1.2047, + "step": 1707 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018014044036622555, + "loss": 1.0269, + "step": 1708 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018011784981646474, + "loss": 1.0536, + "step": 1709 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018009524784364615, + "loss": 1.0516, + "step": 1710 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018007263445099235, + "loss": 0.9087, + "step": 1711 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001800500096417275, + "loss": 1.3057, + "step": 1712 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018002737341907743, + "loss": 0.8791, + "step": 1713 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018000472578626956, + "loss": 1.1667, + "step": 1714 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017998206674653294, + "loss": 1.1026, + "step": 1715 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017995939630309826, + "loss": 1.3228, + "step": 1716 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001799367144591978, + "loss": 0.9173, + "step": 1717 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017991402121806557, + "loss": 1.0067, + "step": 1718 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798913165829371, + "loss": 1.0256, + "step": 1719 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017986860055704953, + "loss": 0.7645, + "step": 1720 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798458731436417, + "loss": 1.0567, + "step": 1721 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017982313434595406, + "loss": 0.7465, + "step": 1722 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017980038416722863, + "loss": 1.3268, + "step": 1723 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017977762261070916, + "loss": 0.9917, + "step": 1724 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017975484967964087, + "loss": 0.8592, + "step": 1725 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017973206537727073, + "loss": 1.43, + "step": 1726 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017970926970684725, + "loss": 1.3679, + "step": 1727 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017968646267162063, + "loss": 1.2959, + "step": 1728 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017966364427484267, + "loss": 1.0674, + "step": 1729 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017964081451976672, + "loss": 1.1153, + "step": 1730 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017961797340964783, + "loss": 1.0586, + "step": 1731 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017959512094774266, + "loss": 1.2388, + "step": 1732 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017957225713730949, + "loss": 1.257, + "step": 1733 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001795493819816081, + "loss": 1.099, + "step": 1734 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001795264954839001, + "loss": 0.9532, + "step": 1735 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017950359764744859, + "loss": 1.2553, + "step": 1736 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017948068847551825, + "loss": 0.9973, + "step": 1737 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017945776797137543, + "loss": 1.0637, + "step": 1738 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017943483613828815, + "loss": 1.1815, + "step": 1739 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017941189297952597, + "loss": 0.8378, + "step": 1740 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017938893849836002, + "loss": 0.9375, + "step": 1741 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017936597269806322, + "loss": 0.9653, + "step": 1742 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001793429955819099, + "loss": 1.221, + "step": 1743 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017932000715317612, + "loss": 1.041, + "step": 1744 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017929700741513955, + "loss": 1.0724, + "step": 1745 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017927399637107945, + "loss": 1.1102, + "step": 1746 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017925097402427667, + "loss": 0.8542, + "step": 1747 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001792279403780137, + "loss": 1.2339, + "step": 1748 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017920489543557465, + "loss": 0.8671, + "step": 1749 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791818392002452, + "loss": 0.9779, + "step": 1750 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791587716753127, + "loss": 1.1242, + "step": 1751 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017913569286406603, + "loss": 0.9043, + "step": 1752 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001791126027697958, + "loss": 0.7996, + "step": 1753 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017908950139579406, + "loss": 0.8602, + "step": 1754 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017906638874535462, + "loss": 1.0161, + "step": 1755 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017904326482177284, + "loss": 0.8226, + "step": 1756 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017902012962834566, + "loss": 1.3885, + "step": 1757 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001789969831683717, + "loss": 1.2158, + "step": 1758 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017897382544515108, + "loss": 1.3261, + "step": 1759 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017895065646198567, + "loss": 1.2144, + "step": 1760 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017892747622217875, + "loss": 0.9881, + "step": 1761 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001789042847290354, + "loss": 1.0342, + "step": 1762 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017888108198586217, + "loss": 0.7883, + "step": 1763 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017885786799596732, + "loss": 0.9006, + "step": 1764 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017883464276266064, + "loss": 1.3695, + "step": 1765 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001788114062892535, + "loss": 1.0303, + "step": 1766 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017878815857905897, + "loss": 1.3816, + "step": 1767 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001787648996353916, + "loss": 0.8684, + "step": 1768 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017874162946156772, + "loss": 1.1157, + "step": 1769 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017871834806090501, + "loss": 1.0087, + "step": 1770 + }, + { + "epoch": 2.1, + "learning_rate": 0.000178695055436723, + "loss": 0.7173, + "step": 1771 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017867175159234265, + "loss": 1.4784, + "step": 1772 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017864843653108662, + "loss": 1.1401, + "step": 1773 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786251102562791, + "loss": 1.0952, + "step": 1774 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786017727712459, + "loss": 0.9443, + "step": 1775 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017857842407931445, + "loss": 1.0682, + "step": 1776 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785550641838138, + "loss": 0.9402, + "step": 1777 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017853169308807448, + "loss": 1.0576, + "step": 1778 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785083107954288, + "loss": 1.1425, + "step": 1779 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017848491730921046, + "loss": 1.1402, + "step": 1780 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017846151263275494, + "loss": 1.4482, + "step": 1781 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017843809676939922, + "loss": 0.7765, + "step": 1782 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017841466972248188, + "loss": 1.1478, + "step": 1783 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001783912314953431, + "loss": 1.1876, + "step": 1784 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017836778209132464, + "loss": 1.2036, + "step": 1785 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783443215137699, + "loss": 1.0297, + "step": 1786 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783208497660239, + "loss": 0.8186, + "step": 1787 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017829736685143308, + "loss": 0.7258, + "step": 1788 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017827387277334568, + "loss": 0.8072, + "step": 1789 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017825036753511144, + "loss": 1.0474, + "step": 1790 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017822685114008167, + "loss": 1.2141, + "step": 1791 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017820332359160928, + "loss": 1.1443, + "step": 1792 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001781797848930488, + "loss": 0.9864, + "step": 1793 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017815623504775636, + "loss": 1.2998, + "step": 1794 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001781326740590896, + "loss": 1.0672, + "step": 1795 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017810910193040785, + "loss": 0.9152, + "step": 1796 + }, + { + "epoch": 2.13, + "learning_rate": 0.000178085518665072, + "loss": 1.2555, + "step": 1797 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017806192426644444, + "loss": 1.2085, + "step": 1798 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017803831873788926, + "loss": 1.6205, + "step": 1799 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001780147020827721, + "loss": 1.3382, + "step": 1800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017799107430446016, + "loss": 1.3309, + "step": 1801 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017796743540632223, + "loss": 1.2556, + "step": 1802 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017794378539172877, + "loss": 0.829, + "step": 1803 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017792012426405166, + "loss": 1.1711, + "step": 1804 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017789645202666456, + "loss": 1.0128, + "step": 1805 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017787276868294253, + "loss": 1.2074, + "step": 1806 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017784907423626237, + "loss": 1.0996, + "step": 1807 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778253686900023, + "loss": 0.9608, + "step": 1808 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778016520475423, + "loss": 0.827, + "step": 1809 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017777792431226383, + "loss": 1.2365, + "step": 1810 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017775418548754993, + "loss": 1.0276, + "step": 1811 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777304355767852, + "loss": 0.8178, + "step": 1812 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777066745833559, + "loss": 1.1297, + "step": 1813 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017768290251064987, + "loss": 1.1737, + "step": 1814 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017765911936205644, + "loss": 1.1606, + "step": 1815 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017763532514096658, + "loss": 1.2605, + "step": 1816 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001776115198507728, + "loss": 1.2271, + "step": 1817 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017758770349486923, + "loss": 0.9407, + "step": 1818 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001775638760766516, + "loss": 1.0273, + "step": 1819 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017754003759951715, + "loss": 1.0746, + "step": 1820 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017751618806686472, + "loss": 1.0091, + "step": 1821 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017749232748209473, + "loss": 0.997, + "step": 1822 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001774684558486092, + "loss": 1.4814, + "step": 1823 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017744457316981168, + "loss": 1.1407, + "step": 1824 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017742067944910737, + "loss": 0.9824, + "step": 1825 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017739677468990293, + "loss": 1.2603, + "step": 1826 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017737285889560668, + "loss": 1.3721, + "step": 1827 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017734893206962853, + "loss": 1.1186, + "step": 1828 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017732499421537984, + "loss": 0.7693, + "step": 1829 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001773010453362737, + "loss": 1.0449, + "step": 1830 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017727708543572467, + "loss": 0.9331, + "step": 1831 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001772531145171489, + "loss": 0.739, + "step": 1832 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017722913258396417, + "loss": 0.9076, + "step": 1833 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017720513963958968, + "loss": 1.3464, + "step": 1834 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017718113568744638, + "loss": 0.8858, + "step": 1835 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017715712073095672, + "loss": 1.3204, + "step": 1836 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017713309477354467, + "loss": 1.0538, + "step": 1837 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001771090578186358, + "loss": 1.44, + "step": 1838 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770850098696573, + "loss": 1.0167, + "step": 1839 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017706095093003785, + "loss": 0.9724, + "step": 1840 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017703688100320774, + "loss": 0.8055, + "step": 1841 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770128000925988, + "loss": 0.7363, + "step": 1842 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017698870820164446, + "loss": 1.1329, + "step": 1843 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017696460533377968, + "loss": 0.9487, + "step": 1844 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017694049149244104, + "loss": 1.2571, + "step": 1845 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001769163666810666, + "loss": 0.9148, + "step": 1846 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017689223090309607, + "loss": 1.4676, + "step": 1847 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017686808416197072, + "loss": 0.9395, + "step": 1848 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017684392646113325, + "loss": 0.9632, + "step": 1849 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017681975780402807, + "loss": 1.0037, + "step": 1850 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001767955781941011, + "loss": 0.9557, + "step": 1851 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017677138763479985, + "loss": 1.2799, + "step": 1852 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017674718612957336, + "loss": 0.8461, + "step": 1853 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001767229736818722, + "loss": 1.2762, + "step": 1854 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017669875029514856, + "loss": 1.4801, + "step": 1855 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017667451597285617, + "loss": 0.9849, + "step": 1856 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766502707184503, + "loss": 1.0875, + "step": 1857 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017662601453538783, + "loss": 0.8346, + "step": 1858 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766017474271271, + "loss": 1.1933, + "step": 1859 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017657746939712815, + "loss": 0.8789, + "step": 1860 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017655318044885245, + "loss": 1.0091, + "step": 1861 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001765288805857631, + "loss": 0.7371, + "step": 1862 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017650456981132466, + "loss": 0.8131, + "step": 1863 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017648024812900342, + "loss": 1.0795, + "step": 1864 + }, + { + "epoch": 2.21, + "learning_rate": 0.000176455915542267, + "loss": 0.9882, + "step": 1865 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017643157205458483, + "loss": 1.212, + "step": 1866 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017640721766942768, + "loss": 1.4755, + "step": 1867 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017638285239026798, + "loss": 1.0391, + "step": 1868 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017635847622057965, + "loss": 1.2568, + "step": 1869 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017633408916383826, + "loss": 1.2138, + "step": 1870 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001763096912235208, + "loss": 1.196, + "step": 1871 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017628528240310596, + "loss": 1.1476, + "step": 1872 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017626086270607384, + "loss": 1.1421, + "step": 1873 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017623643213590619, + "loss": 1.0711, + "step": 1874 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001762119906960863, + "loss": 0.8842, + "step": 1875 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017618753839009893, + "loss": 0.798, + "step": 1876 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761630752214305, + "loss": 0.8591, + "step": 1877 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017613860119356883, + "loss": 0.7646, + "step": 1878 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761141163100035, + "loss": 1.4113, + "step": 1879 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017608962057422549, + "loss": 0.8605, + "step": 1880 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017606511398972731, + "loss": 0.6179, + "step": 1881 + }, + { + "epoch": 2.23, + "eval_loss": 2.3971996307373047, + "eval_runtime": 283.7444, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1881 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760405965600031, + "loss": 0.8651, + "step": 1882 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760160682885485, + "loss": 1.3178, + "step": 1883 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017599152917886071, + "loss": 0.9233, + "step": 1884 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017596697923443847, + "loss": 0.9126, + "step": 1885 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001759424184587821, + "loss": 0.9749, + "step": 1886 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017591784685539334, + "loss": 1.1929, + "step": 1887 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017589326442777565, + "loss": 1.2026, + "step": 1888 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017586867117943392, + "loss": 1.1162, + "step": 1889 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017584406711387463, + "loss": 0.9818, + "step": 1890 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001758194522346057, + "loss": 0.9802, + "step": 1891 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001757948265451368, + "loss": 0.8963, + "step": 1892 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017577019004897897, + "loss": 1.0359, + "step": 1893 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017574554274964478, + "loss": 1.0788, + "step": 1894 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017572088465064848, + "loss": 0.9415, + "step": 1895 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756962157555057, + "loss": 1.0944, + "step": 1896 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017567153606773373, + "loss": 1.357, + "step": 1897 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017564684559085136, + "loss": 1.0108, + "step": 1898 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756221443283789, + "loss": 0.5337, + "step": 1899 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755974322838382, + "loss": 1.4234, + "step": 1900 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755727094607527, + "loss": 0.9083, + "step": 1901 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017554797586264727, + "loss": 0.9199, + "step": 1902 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017552323149304844, + "loss": 1.1885, + "step": 1903 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754984763554842, + "loss": 1.276, + "step": 1904 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754737104534841, + "loss": 0.8882, + "step": 1905 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017544893379057918, + "loss": 0.993, + "step": 1906 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754241463703021, + "loss": 1.261, + "step": 1907 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017539934819618696, + "loss": 0.9877, + "step": 1908 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017537453927176947, + "loss": 0.9991, + "step": 1909 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017534971960058685, + "loss": 1.2012, + "step": 1910 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001753248891861778, + "loss": 0.864, + "step": 1911 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017530004803208263, + "loss": 1.0382, + "step": 1912 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017527519614184316, + "loss": 1.068, + "step": 1913 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017525033351900268, + "loss": 0.8687, + "step": 1914 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752254601671061, + "loss": 1.1174, + "step": 1915 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752005760896998, + "loss": 1.269, + "step": 1916 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751756812903317, + "loss": 0.7387, + "step": 1917 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751507757725513, + "loss": 0.8484, + "step": 1918 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001751258595399095, + "loss": 1.0092, + "step": 1919 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017510093259595885, + "loss": 1.0145, + "step": 1920 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017507599494425344, + "loss": 1.2969, + "step": 1921 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017505104658834875, + "loss": 0.7925, + "step": 1922 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017502608753180196, + "loss": 0.8974, + "step": 1923 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017500111777817164, + "loss": 0.764, + "step": 1924 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001749761373310179, + "loss": 1.1057, + "step": 1925 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017495114619390246, + "loss": 0.8092, + "step": 1926 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017492614437038845, + "loss": 0.9553, + "step": 1927 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017490113186404067, + "loss": 1.0278, + "step": 1928 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748761086784253, + "loss": 1.2152, + "step": 1929 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017485107481711012, + "loss": 1.5154, + "step": 1930 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748260302836644, + "loss": 1.1973, + "step": 1931 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017480097508165896, + "loss": 0.9429, + "step": 1932 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747759092146661, + "loss": 1.5453, + "step": 1933 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747508326862597, + "loss": 1.1691, + "step": 1934 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017472574550001508, + "loss": 1.2094, + "step": 1935 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017470064765950918, + "loss": 1.0777, + "step": 1936 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017467553916832035, + "loss": 1.0883, + "step": 1937 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017465042003002857, + "loss": 0.9297, + "step": 1938 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017462529024821522, + "loss": 0.7814, + "step": 1939 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017460014982646334, + "loss": 1.3645, + "step": 1940 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001745749987683573, + "loss": 1.0604, + "step": 1941 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017454983707748317, + "loss": 0.9416, + "step": 1942 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017452466475742845, + "loss": 1.4187, + "step": 1943 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017449948181178215, + "loss": 1.1619, + "step": 1944 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017447428824413482, + "loss": 1.1381, + "step": 1945 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017444908405807845, + "loss": 1.2304, + "step": 1946 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001744238692572067, + "loss": 1.2149, + "step": 1947 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017439864384511463, + "loss": 0.8172, + "step": 1948 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017437340782539877, + "loss": 1.0783, + "step": 1949 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017434816120165728, + "loss": 1.0661, + "step": 1950 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017432290397748982, + "loss": 1.1959, + "step": 1951 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001742976361564974, + "loss": 1.0581, + "step": 1952 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017427235774228274, + "loss": 0.8948, + "step": 1953 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017424706873845, + "loss": 1.2565, + "step": 1954 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017422176914860476, + "loss": 0.9237, + "step": 1955 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017419645897635432, + "loss": 1.219, + "step": 1956 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017417113822530727, + "loss": 1.4606, + "step": 1957 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017414580689907377, + "loss": 0.714, + "step": 1958 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001741204650012656, + "loss": 1.2223, + "step": 1959 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017409511253549593, + "loss": 0.9828, + "step": 1960 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017406974950537942, + "loss": 0.9954, + "step": 1961 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017404437591453235, + "loss": 1.0307, + "step": 1962 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001740189917665724, + "loss": 0.9331, + "step": 1963 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739935970651188, + "loss": 1.3517, + "step": 1964 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017396819181379232, + "loss": 1.2024, + "step": 1965 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739427760162151, + "loss": 0.9696, + "step": 1966 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017391734967601102, + "loss": 1.1559, + "step": 1967 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001738919127968052, + "loss": 1.3104, + "step": 1968 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017386646538222443, + "loss": 0.9073, + "step": 1969 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017384100743589697, + "loss": 1.0539, + "step": 1970 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017381553896145255, + "loss": 0.9873, + "step": 1971 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737900599625224, + "loss": 0.9466, + "step": 1972 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737645704427393, + "loss": 1.0639, + "step": 1973 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737390704057375, + "loss": 0.5843, + "step": 1974 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017371355985515275, + "loss": 1.1318, + "step": 1975 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017368803879462227, + "loss": 1.0116, + "step": 1976 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001736625072277848, + "loss": 0.8845, + "step": 1977 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017363696515828062, + "loss": 0.8081, + "step": 1978 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017361141258975148, + "loss": 0.8795, + "step": 1979 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735858495258406, + "loss": 0.9725, + "step": 1980 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735602759701927, + "loss": 1.0164, + "step": 1981 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017353469192645405, + "loss": 1.2937, + "step": 1982 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735090973982723, + "loss": 1.0842, + "step": 1983 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017348349238929678, + "loss": 1.0043, + "step": 1984 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017345787690317815, + "loss": 1.1302, + "step": 1985 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017343225094356855, + "loss": 1.195, + "step": 1986 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017340661451412183, + "loss": 1.1449, + "step": 1987 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017338096761849309, + "loss": 1.2244, + "step": 1988 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017335531026033897, + "loss": 0.9273, + "step": 1989 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017332964244331776, + "loss": 1.0448, + "step": 1990 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017330396417108908, + "loss": 1.0074, + "step": 1991 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017327827544731412, + "loss": 0.9284, + "step": 1992 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001732525762756555, + "loss": 1.0307, + "step": 1993 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017322686665977737, + "loss": 1.1526, + "step": 1994 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017320114660334535, + "loss": 0.819, + "step": 1995 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017317541611002656, + "loss": 1.1029, + "step": 1996 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017314967518348962, + "loss": 1.2471, + "step": 1997 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017312392382740462, + "loss": 1.0156, + "step": 1998 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017309816204544317, + "loss": 1.1843, + "step": 1999 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017307238984127832, + "loss": 1.1588, + "step": 2000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017304660721858457, + "loss": 1.0157, + "step": 2001 + }, + { + "epoch": 2.38, + "learning_rate": 0.000173020814181038, + "loss": 1.0563, + "step": 2002 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017299501073231622, + "loss": 1.1883, + "step": 2003 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017296919687609808, + "loss": 0.9404, + "step": 2004 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017294337261606417, + "loss": 1.2495, + "step": 2005 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017291753795589643, + "loss": 1.0074, + "step": 2006 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017289169289927837, + "loss": 1.1411, + "step": 2007 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017286583744989488, + "loss": 0.9942, + "step": 2008 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017283997161143239, + "loss": 0.952, + "step": 2009 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017281409538757883, + "loss": 1.2966, + "step": 2010 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017278820878202357, + "loss": 1.0836, + "step": 2011 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727623117984575, + "loss": 1.0984, + "step": 2012 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727364044405729, + "loss": 0.8822, + "step": 2013 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017271048671206366, + "loss": 1.2014, + "step": 2014 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017268455861662503, + "loss": 1.1779, + "step": 2015 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017265862015795384, + "loss": 0.9966, + "step": 2016 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017263267133974832, + "loss": 0.9536, + "step": 2017 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017260671216570822, + "loss": 0.811, + "step": 2018 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017258074263953472, + "loss": 0.8241, + "step": 2019 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017255476276493056, + "loss": 1.1263, + "step": 2020 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017252877254559986, + "loss": 0.995, + "step": 2021 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001725027719852483, + "loss": 1.1481, + "step": 2022 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001724767610875829, + "loss": 1.129, + "step": 2023 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017245073985631238, + "loss": 0.5928, + "step": 2024 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017242470829514672, + "loss": 0.8326, + "step": 2025 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017239866640779745, + "loss": 1.1092, + "step": 2026 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017237261419797756, + "loss": 1.5015, + "step": 2027 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001723465516694016, + "loss": 0.9775, + "step": 2028 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017232047882578548, + "loss": 0.9348, + "step": 2029 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001722943956708466, + "loss": 0.6199, + "step": 2030 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017226830220830384, + "loss": 1.1485, + "step": 2031 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017224219844187764, + "loss": 1.1195, + "step": 2032 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017221608437528973, + "loss": 1.0528, + "step": 2033 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017218996001226345, + "loss": 1.1058, + "step": 2034 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017216382535652355, + "loss": 1.1451, + "step": 2035 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001721376804117963, + "loss": 1.2251, + "step": 2036 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017211152518180936, + "loss": 1.0708, + "step": 2037 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017208535967029188, + "loss": 1.0746, + "step": 2038 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017205918388097456, + "loss": 1.3262, + "step": 2039 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017203299781758943, + "loss": 0.7619, + "step": 2040 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017200680148387007, + "loss": 1.01, + "step": 2041 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001719805948835515, + "loss": 1.1651, + "step": 2042 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017195437802037026, + "loss": 1.4671, + "step": 2043 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017192815089806424, + "loss": 0.9857, + "step": 2044 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001719019135203729, + "loss": 1.2613, + "step": 2045 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017187566589103704, + "loss": 1.4386, + "step": 2046 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001718494080137991, + "loss": 1.0965, + "step": 2047 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017182313989240285, + "loss": 0.752, + "step": 2048 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017179686153059352, + "loss": 0.9126, + "step": 2049 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017177057293211784, + "loss": 1.5075, + "step": 2050 + }, + { + "epoch": 2.43, + "learning_rate": 0.000171744274100724, + "loss": 1.0407, + "step": 2051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017171796504016166, + "loss": 0.8263, + "step": 2052 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001716916457541819, + "loss": 0.9453, + "step": 2053 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017166531624653722, + "loss": 0.9777, + "step": 2054 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017163897652098172, + "loss": 1.2129, + "step": 2055 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017161262658127086, + "loss": 1.3642, + "step": 2056 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017158626643116152, + "loss": 0.6798, + "step": 2057 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017155989607441213, + "loss": 0.874, + "step": 2058 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017153351551478247, + "loss": 1.0636, + "step": 2059 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001715071247560339, + "loss": 1.0563, + "step": 2060 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714807238019291, + "loss": 1.1984, + "step": 2061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017145431265623234, + "loss": 0.9444, + "step": 2062 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714278913227092, + "loss": 0.7809, + "step": 2063 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017140145980512684, + "loss": 1.649, + "step": 2064 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713750181072538, + "loss": 1.0956, + "step": 2065 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713485662328601, + "loss": 1.2845, + "step": 2066 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017132210418571714, + "loss": 1.0484, + "step": 2067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017129563196959793, + "loss": 1.0291, + "step": 2068 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017126914958827679, + "loss": 1.1226, + "step": 2069 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001712426570455295, + "loss": 1.0119, + "step": 2070 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017121615434513332, + "loss": 1.1663, + "step": 2071 + }, + { + "epoch": 2.46, + "learning_rate": 0.000171189641490867, + "loss": 1.1353, + "step": 2072 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017116311848651064, + "loss": 1.0761, + "step": 2073 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017113658533584594, + "loss": 1.1978, + "step": 2074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017111004204265582, + "loss": 1.3881, + "step": 2075 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017108348861072484, + "loss": 1.3945, + "step": 2076 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017105692504383897, + "loss": 1.3796, + "step": 2077 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017103035134578555, + "loss": 1.1721, + "step": 2078 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001710037675203534, + "loss": 1.0061, + "step": 2079 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017097717357133284, + "loss": 1.2456, + "step": 2080 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017095056950251555, + "loss": 0.788, + "step": 2081 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001709239553176947, + "loss": 1.16, + "step": 2082 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001708973310206649, + "loss": 1.0498, + "step": 2083 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017087069661522218, + "loss": 0.8993, + "step": 2084 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017084405210516406, + "loss": 1.2088, + "step": 2085 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001708173974942894, + "loss": 1.0897, + "step": 2086 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017079073278639863, + "loss": 1.2718, + "step": 2087 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017076405798529355, + "loss": 1.2325, + "step": 2088 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017073737309477736, + "loss": 1.0555, + "step": 2089 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017071067811865476, + "loss": 1.1428, + "step": 2090 + }, + { + "epoch": 2.48, + "eval_loss": 2.3191208839416504, + "eval_runtime": 284.1375, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2090 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706839730607319, + "loss": 1.0908, + "step": 2091 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706572579248163, + "loss": 1.2092, + "step": 2092 + }, + { + "epoch": 2.48, + "learning_rate": 0.000170630532714717, + "loss": 1.1735, + "step": 2093 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001706037974342444, + "loss": 1.2716, + "step": 2094 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017057705208721035, + "loss": 1.0095, + "step": 2095 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001705502966774282, + "loss": 1.3059, + "step": 2096 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017052353120871266, + "loss": 0.8269, + "step": 2097 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001704967556848799, + "loss": 1.0615, + "step": 2098 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017046997010974755, + "loss": 1.2709, + "step": 2099 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017044317448713461, + "loss": 1.1633, + "step": 2100 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017041636882086158, + "loss": 0.9273, + "step": 2101 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017038955311475038, + "loss": 1.3117, + "step": 2102 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001703627273726243, + "loss": 0.8883, + "step": 2103 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017033589159830815, + "loss": 1.1371, + "step": 2104 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017030904579562806, + "loss": 1.5402, + "step": 2105 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017028218996841172, + "loss": 0.9156, + "step": 2106 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017025532412048817, + "loss": 1.0962, + "step": 2107 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001702284482556879, + "loss": 0.9402, + "step": 2108 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017020156237784279, + "loss": 0.8146, + "step": 2109 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001701746664907862, + "loss": 1.1718, + "step": 2110 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017014776059835288, + "loss": 1.0618, + "step": 2111 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017012084470437907, + "loss": 1.4796, + "step": 2112 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017009391881270237, + "loss": 0.8402, + "step": 2113 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017006698292716178, + "loss": 1.1641, + "step": 2114 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001700400370515978, + "loss": 1.241, + "step": 2115 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017001308118985237, + "loss": 0.8683, + "step": 2116 + }, + { + "epoch": 2.51, + "learning_rate": 0.00016998611534576873, + "loss": 1.2697, + "step": 2117 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016995913952319168, + "loss": 0.9233, + "step": 2118 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016993215372596737, + "loss": 1.2472, + "step": 2119 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016990515795794334, + "loss": 1.2541, + "step": 2120 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016987815222296865, + "loss": 1.0016, + "step": 2121 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016985113652489374, + "loss": 1.0678, + "step": 2122 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016982411086757037, + "loss": 1.6066, + "step": 2123 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016979707525485192, + "loss": 1.229, + "step": 2124 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016977002969059302, + "loss": 0.752, + "step": 2125 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016974297417864977, + "loss": 0.8752, + "step": 2126 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001697159087228797, + "loss": 0.8896, + "step": 2127 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016968883332714186, + "loss": 0.9657, + "step": 2128 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696617479952964, + "loss": 1.3657, + "step": 2129 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696346527312053, + "loss": 0.9876, + "step": 2130 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016960754753873162, + "loss": 1.0165, + "step": 2131 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016958043242174003, + "loss": 1.625, + "step": 2132 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016955330738409655, + "loss": 1.5502, + "step": 2133 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016952617242966864, + "loss": 1.0793, + "step": 2134 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016949902756232507, + "loss": 1.4425, + "step": 2135 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016947187278593622, + "loss": 1.3124, + "step": 2136 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016944470810437365, + "loss": 0.927, + "step": 2137 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016941753352151055, + "loss": 1.1911, + "step": 2138 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016939034904122138, + "loss": 1.0768, + "step": 2139 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016936315466738205, + "loss": 1.1277, + "step": 2140 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016933595040386984, + "loss": 0.812, + "step": 2141 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001693087362545636, + "loss": 0.8299, + "step": 2142 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016928151222334338, + "loss": 1.1125, + "step": 2143 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016925427831409077, + "loss": 1.1835, + "step": 2144 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016922703453068873, + "loss": 1.2007, + "step": 2145 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016919978087702163, + "loss": 0.8524, + "step": 2146 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016917251735697523, + "loss": 0.9497, + "step": 2147 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016914524397443673, + "loss": 1.1004, + "step": 2148 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016911796073329466, + "loss": 0.8347, + "step": 2149 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016909066763743912, + "loss": 0.9492, + "step": 2150 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016906336469076148, + "loss": 1.1406, + "step": 2151 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016903605189715447, + "loss": 1.0137, + "step": 2152 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001690087292605124, + "loss": 1.0624, + "step": 2153 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016898139678473076, + "loss": 1.1767, + "step": 2154 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001689540544737067, + "loss": 1.4184, + "step": 2155 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016892670233133856, + "loss": 0.957, + "step": 2156 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016889934036152618, + "loss": 1.0399, + "step": 2157 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016887196856817073, + "loss": 1.2009, + "step": 2158 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016884458695517495, + "loss": 1.3977, + "step": 2159 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016881719552644273, + "loss": 1.1328, + "step": 2160 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016878979428587955, + "loss": 1.5007, + "step": 2161 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016876238323739221, + "loss": 1.1248, + "step": 2162 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016873496238488899, + "loss": 1.0358, + "step": 2163 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016870753173227945, + "loss": 1.2961, + "step": 2164 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016868009128347459, + "loss": 0.9435, + "step": 2165 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016865264104238683, + "loss": 0.9642, + "step": 2166 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016862518101293, + "loss": 1.0169, + "step": 2167 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016859771119901929, + "loss": 1.0904, + "step": 2168 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001685702316045713, + "loss": 1.3178, + "step": 2169 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016854274223350397, + "loss": 1.1395, + "step": 2170 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016851524308973678, + "loss": 1.1207, + "step": 2171 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016848773417719044, + "loss": 1.3544, + "step": 2172 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016846021549978715, + "loss": 1.3503, + "step": 2173 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016843268706145042, + "loss": 1.4276, + "step": 2174 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016840514886610529, + "loss": 0.9888, + "step": 2175 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016837760091767802, + "loss": 1.0913, + "step": 2176 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001683500432200964, + "loss": 1.4781, + "step": 2177 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016832247577728955, + "loss": 1.2657, + "step": 2178 + }, + { + "epoch": 2.59, + "learning_rate": 0.000168294898593188, + "loss": 0.9206, + "step": 2179 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682673116717236, + "loss": 0.9218, + "step": 2180 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682397150168297, + "loss": 1.2719, + "step": 2181 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016821210863244096, + "loss": 0.984, + "step": 2182 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016818449252249345, + "loss": 1.4641, + "step": 2183 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001681568666909246, + "loss": 1.2571, + "step": 2184 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016812923114167328, + "loss": 1.2025, + "step": 2185 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016810158587867973, + "loss": 0.9621, + "step": 2186 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016807393090588553, + "loss": 1.0016, + "step": 2187 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016804626622723368, + "loss": 1.031, + "step": 2188 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016801859184666857, + "loss": 0.7573, + "step": 2189 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016799090776813597, + "loss": 1.2694, + "step": 2190 + }, + { + "epoch": 2.6, + "learning_rate": 0.000167963213995583, + "loss": 1.196, + "step": 2191 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016793551053295822, + "loss": 0.8754, + "step": 2192 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016790779738421152, + "loss": 1.1743, + "step": 2193 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678800745532942, + "loss": 1.0921, + "step": 2194 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016785234204415888, + "loss": 0.8778, + "step": 2195 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678245998607597, + "loss": 1.0528, + "step": 2196 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016779684800705203, + "loss": 1.0255, + "step": 2197 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001677690864869927, + "loss": 0.6344, + "step": 2198 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016774131530453992, + "loss": 0.8691, + "step": 2199 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016771353446365318, + "loss": 1.2061, + "step": 2200 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001676857439682935, + "loss": 1.1759, + "step": 2201 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016765794382242314, + "loss": 1.1118, + "step": 2202 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016763013403000584, + "loss": 1.3005, + "step": 2203 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016760231459500666, + "loss": 1.0415, + "step": 2204 + }, + { + "epoch": 2.62, + "learning_rate": 0.000167574485521392, + "loss": 0.824, + "step": 2205 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016754664681312975, + "loss": 0.6682, + "step": 2206 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016751879847418905, + "loss": 1.9204, + "step": 2207 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016749094050854047, + "loss": 0.9931, + "step": 2208 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016746307292015602, + "loss": 0.8898, + "step": 2209 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016743519571300888, + "loss": 1.3337, + "step": 2210 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016740730889107383, + "loss": 1.2947, + "step": 2211 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673794124583269, + "loss": 1.1882, + "step": 2212 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673515064187455, + "loss": 1.5408, + "step": 2213 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016732359077630847, + "loss": 1.1273, + "step": 2214 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001672956655349959, + "loss": 0.8954, + "step": 2215 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016726773069878934, + "loss": 1.1747, + "step": 2216 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016723978627167173, + "loss": 0.807, + "step": 2217 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016721183225762727, + "loss": 1.2512, + "step": 2218 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016718386866064166, + "loss": 1.0796, + "step": 2219 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016715589548470185, + "loss": 1.0905, + "step": 2220 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016712791273379622, + "loss": 1.3779, + "step": 2221 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016709992041191452, + "loss": 1.2015, + "step": 2222 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016707191852304782, + "loss": 0.8612, + "step": 2223 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001670439070711886, + "loss": 1.1819, + "step": 2224 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016701588606033064, + "loss": 1.2715, + "step": 2225 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001669878554944692, + "loss": 1.3681, + "step": 2226 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016695981537760072, + "loss": 1.1254, + "step": 2227 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669317657137232, + "loss": 0.9476, + "step": 2228 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669037065068359, + "loss": 1.235, + "step": 2229 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016687563776093941, + "loss": 0.7356, + "step": 2230 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016684755948003573, + "loss": 0.7901, + "step": 2231 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016681947166812824, + "loss": 1.317, + "step": 2232 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016679137432922163, + "loss": 0.8832, + "step": 2233 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016676326746732195, + "loss": 1.2776, + "step": 2234 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016673515108643665, + "loss": 1.0435, + "step": 2235 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001667070251905745, + "loss": 1.0957, + "step": 2236 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016667888978374567, + "loss": 1.0862, + "step": 2237 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016665074486996165, + "loss": 1.1112, + "step": 2238 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001666225904532352, + "loss": 1.3633, + "step": 2239 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016659442653758064, + "loss": 1.444, + "step": 2240 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016656625312701348, + "loss": 0.8248, + "step": 2241 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016653807022555067, + "loss": 1.2522, + "step": 2242 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001665098778372104, + "loss": 1.2107, + "step": 2243 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001664816759660124, + "loss": 1.0813, + "step": 2244 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016645346461597753, + "loss": 1.1136, + "step": 2245 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016642524379112817, + "loss": 1.1003, + "step": 2246 + }, + { + "epoch": 2.67, + "learning_rate": 0.000166397013495488, + "loss": 1.0635, + "step": 2247 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016636877373308204, + "loss": 1.0575, + "step": 2248 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016634052450793663, + "loss": 0.7693, + "step": 2249 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016631226582407952, + "loss": 1.5965, + "step": 2250 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001662839976855398, + "loss": 1.0989, + "step": 2251 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016625572009634787, + "loss": 0.9198, + "step": 2252 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016622743306053548, + "loss": 1.0896, + "step": 2253 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016619913658213578, + "loss": 1.015, + "step": 2254 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001661708306651832, + "loss": 0.8572, + "step": 2255 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016614251531371353, + "loss": 1.1508, + "step": 2256 + }, + { + "epoch": 2.68, + "learning_rate": 0.000166114190531764, + "loss": 1.1852, + "step": 2257 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016608585632337306, + "loss": 0.932, + "step": 2258 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016605751269258053, + "loss": 1.2542, + "step": 2259 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016602915964342757, + "loss": 0.943, + "step": 2260 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016600079717995678, + "loss": 1.2438, + "step": 2261 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016597242530621203, + "loss": 0.9928, + "step": 2262 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016594404402623845, + "loss": 0.9516, + "step": 2263 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016591565334408265, + "loss": 1.1689, + "step": 2264 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001658872532637925, + "loss": 1.3155, + "step": 2265 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016585884378941725, + "loss": 1.1596, + "step": 2266 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016583042492500746, + "loss": 0.9956, + "step": 2267 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016580199667461508, + "loss": 0.9289, + "step": 2268 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016577355904229325, + "loss": 1.3225, + "step": 2269 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016574511203209667, + "loss": 1.0384, + "step": 2270 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001657166556480812, + "loss": 0.697, + "step": 2271 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016568818989430416, + "loss": 0.7702, + "step": 2272 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016565971477482404, + "loss": 1.1041, + "step": 2273 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016563123029370093, + "loss": 1.0462, + "step": 2274 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001656027364549959, + "loss": 1.0797, + "step": 2275 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001655742332627717, + "loss": 1.3301, + "step": 2276 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001655457207210922, + "loss": 1.0467, + "step": 2277 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016551719883402271, + "loss": 0.9432, + "step": 2278 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016548866760562978, + "loss": 1.1808, + "step": 2279 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016546012703998138, + "loss": 1.1094, + "step": 2280 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016543157714114673, + "loss": 1.3914, + "step": 2281 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016540301791319645, + "loss": 1.0402, + "step": 2282 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016537444936020246, + "loss": 0.9815, + "step": 2283 + }, + { + "epoch": 2.71, + "learning_rate": 0.000165345871486238, + "loss": 0.9722, + "step": 2284 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016531728429537766, + "loss": 0.919, + "step": 2285 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016528868779169738, + "loss": 1.1242, + "step": 2286 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016526008197927436, + "loss": 1.1794, + "step": 2287 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016523146686218718, + "loss": 1.434, + "step": 2288 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016520284244451574, + "loss": 0.8463, + "step": 2289 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016517420873034123, + "loss": 1.1736, + "step": 2290 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001651455657237462, + "loss": 1.0431, + "step": 2291 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016511691342881453, + "loss": 1.2796, + "step": 2292 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650882518496314, + "loss": 1.0578, + "step": 2293 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016505958099028334, + "loss": 1.3914, + "step": 2294 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650309008548582, + "loss": 1.0046, + "step": 2295 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650022114474451, + "loss": 1.0246, + "step": 2296 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016497351277213458, + "loss": 1.2789, + "step": 2297 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016494480483301836, + "loss": 1.0036, + "step": 2298 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016491608763418968, + "loss": 0.886, + "step": 2299 + }, + { + "epoch": 2.73, + "eval_loss": 2.3017475605010986, + "eval_runtime": 283.8846, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2299 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648873611797429, + "loss": 1.3953, + "step": 2300 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648586254737738, + "loss": 0.6972, + "step": 2301 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016482988052037947, + "loss": 1.2311, + "step": 2302 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016480112632365833, + "loss": 1.327, + "step": 2303 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647723628877101, + "loss": 0.9534, + "step": 2304 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647435902166358, + "loss": 0.9164, + "step": 2305 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647148083145378, + "loss": 1.1038, + "step": 2306 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016468601718551976, + "loss": 1.0444, + "step": 2307 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016465721683368666, + "loss": 1.2635, + "step": 2308 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016462840726314486, + "loss": 1.1647, + "step": 2309 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016459958847800187, + "loss": 1.3617, + "step": 2310 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016457076048236675, + "loss": 1.2355, + "step": 2311 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016454192328034962, + "loss": 0.9989, + "step": 2312 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016451307687606213, + "loss": 1.1218, + "step": 2313 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016448422127361706, + "loss": 0.8967, + "step": 2314 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644553564771287, + "loss": 1.159, + "step": 2315 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644264824907124, + "loss": 1.5901, + "step": 2316 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001643975993184851, + "loss": 0.979, + "step": 2317 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016436870696456482, + "loss": 0.8561, + "step": 2318 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016433980543307107, + "loss": 0.9485, + "step": 2319 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016431089472812444, + "loss": 0.7736, + "step": 2320 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016428197485384707, + "loss": 1.2546, + "step": 2321 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016425304581436226, + "loss": 0.9534, + "step": 2322 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001642241076137947, + "loss": 0.8182, + "step": 2323 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641951602562703, + "loss": 1.1107, + "step": 2324 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641662037459164, + "loss": 1.0628, + "step": 2325 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016413723808686147, + "loss": 1.6261, + "step": 2326 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001641082632832354, + "loss": 1.0286, + "step": 2327 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001640792793391694, + "loss": 0.5732, + "step": 2328 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016405028625879594, + "loss": 1.0932, + "step": 2329 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016402128404624882, + "loss": 1.2585, + "step": 2330 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016399227270566308, + "loss": 0.8788, + "step": 2331 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639632522411751, + "loss": 1.1397, + "step": 2332 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016393422265692262, + "loss": 1.3517, + "step": 2333 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639051839570446, + "loss": 1.1346, + "step": 2334 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016387613614568126, + "loss": 0.9594, + "step": 2335 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001638470792269743, + "loss": 1.0674, + "step": 2336 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016381801320506653, + "loss": 0.9123, + "step": 2337 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016378893808410215, + "loss": 1.1909, + "step": 2338 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016375985386822664, + "loss": 1.0474, + "step": 2339 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016373076056158675, + "loss": 0.8844, + "step": 2340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001637016581683306, + "loss": 1.1606, + "step": 2341 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016367254669260749, + "loss": 0.6206, + "step": 2342 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016364342613856816, + "loss": 0.7225, + "step": 2343 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016361429651036446, + "loss": 1.1782, + "step": 2344 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016358515781214977, + "loss": 1.0911, + "step": 2345 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016355601004807856, + "loss": 1.2727, + "step": 2346 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016352685322230663, + "loss": 0.8294, + "step": 2347 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016349768733899117, + "loss": 1.1661, + "step": 2348 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016346851240229057, + "loss": 0.8267, + "step": 2349 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016343932841636456, + "loss": 1.2873, + "step": 2350 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016341013538537412, + "loss": 1.2459, + "step": 2351 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016338093331348156, + "loss": 0.8939, + "step": 2352 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016335172220485042, + "loss": 1.024, + "step": 2353 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001633225020636456, + "loss": 0.9981, + "step": 2354 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016329327289403325, + "loss": 1.331, + "step": 2355 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016326403470018084, + "loss": 0.7446, + "step": 2356 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016323478748625703, + "loss": 1.1931, + "step": 2357 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016320553125643187, + "loss": 1.1287, + "step": 2358 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016317626601487667, + "loss": 1.109, + "step": 2359 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016314699176576402, + "loss": 0.9946, + "step": 2360 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016311770851326778, + "loss": 0.8347, + "step": 2361 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016308841626156307, + "loss": 0.9214, + "step": 2362 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001630591150148264, + "loss": 0.5907, + "step": 2363 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016302980477723539, + "loss": 1.2412, + "step": 2364 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016300048555296915, + "loss": 1.2908, + "step": 2365 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016297115734620788, + "loss": 1.2345, + "step": 2366 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016294182016113315, + "loss": 1.0418, + "step": 2367 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016291247400192785, + "loss": 1.1457, + "step": 2368 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016288311887277608, + "loss": 1.2529, + "step": 2369 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016285375477786322, + "loss": 1.0013, + "step": 2370 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016282438172137597, + "loss": 0.943, + "step": 2371 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016279499970750226, + "loss": 0.7009, + "step": 2372 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016276560874043137, + "loss": 0.9408, + "step": 2373 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627362088243538, + "loss": 1.1788, + "step": 2374 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627067999634613, + "loss": 0.8106, + "step": 2375 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016267738216194696, + "loss": 1.1695, + "step": 2376 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001626479554240051, + "loss": 0.9209, + "step": 2377 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016261851975383137, + "loss": 0.9911, + "step": 2378 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016258907515562262, + "loss": 1.3819, + "step": 2379 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162559621633577, + "loss": 0.8926, + "step": 2380 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162530159191894, + "loss": 1.0896, + "step": 2381 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016250068783477424, + "loss": 0.8403, + "step": 2382 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016247120756641972, + "loss": 0.7976, + "step": 2383 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001624417183910337, + "loss": 0.8881, + "step": 2384 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001624122203128207, + "loss": 0.8302, + "step": 2385 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623827133359865, + "loss": 1.3312, + "step": 2386 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623531974647381, + "loss": 1.003, + "step": 2387 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623236727032839, + "loss": 0.9487, + "step": 2388 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016229413905583342, + "loss": 1.2259, + "step": 2389 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016226459652659753, + "loss": 0.9327, + "step": 2390 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016223504511978838, + "loss": 0.7336, + "step": 2391 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016220548483961934, + "loss": 1.0454, + "step": 2392 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016217591569030505, + "loss": 1.3371, + "step": 2393 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016214633767606143, + "loss": 1.0814, + "step": 2394 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016211675080110566, + "loss": 1.2274, + "step": 2395 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001620871550696562, + "loss": 0.9775, + "step": 2396 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016205755048593273, + "loss": 1.0323, + "step": 2397 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016202793705415622, + "loss": 1.5101, + "step": 2398 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016199831477854893, + "loss": 0.8118, + "step": 2399 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001619686836633343, + "loss": 1.0233, + "step": 2400 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016193904371273715, + "loss": 0.9038, + "step": 2401 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016190939493098344, + "loss": 0.875, + "step": 2402 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016187973732230038, + "loss": 1.3274, + "step": 2403 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016185007089091665, + "loss": 1.081, + "step": 2404 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016182039564106192, + "loss": 1.0841, + "step": 2405 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016179071157696728, + "loss": 1.3208, + "step": 2406 + }, + { + "epoch": 2.86, + "learning_rate": 0.000161761018702865, + "loss": 1.1854, + "step": 2407 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617313170229887, + "loss": 1.0651, + "step": 2408 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617016065415731, + "loss": 1.1398, + "step": 2409 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016167188726285434, + "loss": 1.2778, + "step": 2410 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016164215919106968, + "loss": 1.6758, + "step": 2411 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001616124223304577, + "loss": 0.8341, + "step": 2412 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016158267668525832, + "loss": 0.9513, + "step": 2413 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016155292225971253, + "loss": 0.9617, + "step": 2414 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016152315905806268, + "loss": 0.8664, + "step": 2415 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016149338708455237, + "loss": 1.331, + "step": 2416 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016146360634342643, + "loss": 1.4212, + "step": 2417 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016143381683893094, + "loss": 1.2126, + "step": 2418 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016140401857531322, + "loss": 0.934, + "step": 2419 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016137421155682183, + "loss": 1.2417, + "step": 2420 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001613443957877067, + "loss": 1.637, + "step": 2421 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016131457127221881, + "loss": 1.1456, + "step": 2422 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016128473801461053, + "loss": 0.9402, + "step": 2423 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612548960191354, + "loss": 1.3797, + "step": 2424 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612250452900483, + "loss": 0.8191, + "step": 2425 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001611951858316052, + "loss": 1.1725, + "step": 2426 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016116531764806346, + "loss": 1.5701, + "step": 2427 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016113544074368164, + "loss": 1.0591, + "step": 2428 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016110555512271953, + "loss": 1.03, + "step": 2429 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001610756607894382, + "loss": 1.1829, + "step": 2430 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016104575774809985, + "loss": 1.2222, + "step": 2431 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016101584600296804, + "loss": 1.1537, + "step": 2432 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016098592555830753, + "loss": 1.0973, + "step": 2433 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016095599641838436, + "loss": 1.0793, + "step": 2434 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016092605858746573, + "loss": 1.3484, + "step": 2435 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608961120698201, + "loss": 1.1689, + "step": 2436 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016086615686971726, + "loss": 1.0864, + "step": 2437 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016083619299142813, + "loss": 1.2451, + "step": 2438 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608062204392249, + "loss": 0.9593, + "step": 2439 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016077623921738102, + "loss": 0.9816, + "step": 2440 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016074624933017112, + "loss": 1.0845, + "step": 2441 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016071625078187114, + "loss": 0.9875, + "step": 2442 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001606862435767582, + "loss": 0.8758, + "step": 2443 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016065622771911067, + "loss": 0.9499, + "step": 2444 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016062620321320823, + "loss": 1.1133, + "step": 2445 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605961700633316, + "loss": 0.7228, + "step": 2446 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016056612827376293, + "loss": 1.2297, + "step": 2447 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605360778487855, + "loss": 1.0251, + "step": 2448 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016050601879268386, + "loss": 0.8097, + "step": 2449 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016047595110974376, + "loss": 0.9872, + "step": 2450 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001604458748042522, + "loss": 1.1119, + "step": 2451 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001604157898804974, + "loss": 0.8256, + "step": 2452 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016038569634276882, + "loss": 0.9036, + "step": 2453 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016035559419535716, + "loss": 1.1173, + "step": 2454 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016032548344255428, + "loss": 1.3173, + "step": 2455 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016029536408865337, + "loss": 0.717, + "step": 2456 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016026523613794878, + "loss": 0.9806, + "step": 2457 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016023509959473605, + "loss": 1.1509, + "step": 2458 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016020495446331207, + "loss": 1.0454, + "step": 2459 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601748007479748, + "loss": 1.183, + "step": 2460 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601446384530236, + "loss": 1.2611, + "step": 2461 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016011446758275888, + "loss": 1.0377, + "step": 2462 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016008428814148236, + "loss": 1.2111, + "step": 2463 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016005410013349698, + "loss": 1.0952, + "step": 2464 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016002390356310685, + "loss": 0.7589, + "step": 2465 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015999369843461742, + "loss": 0.8543, + "step": 2466 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015996348475233525, + "loss": 1.1509, + "step": 2467 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001599332625205681, + "loss": 1.287, + "step": 2468 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015990303174362512, + "loss": 1.0401, + "step": 2469 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001598727924258164, + "loss": 1.0247, + "step": 2470 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015984254457145354, + "loss": 1.1537, + "step": 2471 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015981228818484917, + "loss": 0.9606, + "step": 2472 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001597820232703172, + "loss": 0.8709, + "step": 2473 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015975174983217275, + "loss": 1.2827, + "step": 2474 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015972146787473213, + "loss": 0.8057, + "step": 2475 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001596911774023129, + "loss": 1.0857, + "step": 2476 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015966087841923386, + "loss": 1.1731, + "step": 2477 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001596305709298149, + "loss": 0.8871, + "step": 2478 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015960025493837727, + "loss": 1.0671, + "step": 2479 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015956993044924334, + "loss": 1.3735, + "step": 2480 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015953959746673675, + "loss": 1.4655, + "step": 2481 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015950925599518228, + "loss": 1.3975, + "step": 2482 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015947890603890602, + "loss": 0.9468, + "step": 2483 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001594485476022352, + "loss": 0.9976, + "step": 2484 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015941818068949818, + "loss": 0.6732, + "step": 2485 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015938780530502474, + "loss": 0.9848, + "step": 2486 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015935742145314568, + "loss": 1.2441, + "step": 2487 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001593270291381931, + "loss": 0.9631, + "step": 2488 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015929662836450029, + "loss": 0.8868, + "step": 2489 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592662191364017, + "loss": 0.9063, + "step": 2490 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015923580145823303, + "loss": 0.6886, + "step": 2491 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592053753343312, + "loss": 1.0702, + "step": 2492 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591749407690343, + "loss": 1.3879, + "step": 2493 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015914449776668167, + "loss": 1.1048, + "step": 2494 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591140463316137, + "loss": 0.9921, + "step": 2495 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015908358646817225, + "loss": 1.3042, + "step": 2496 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015905311818070015, + "loss": 0.8413, + "step": 2497 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015902264147354153, + "loss": 1.5201, + "step": 2498 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589921563510417, + "loss": 1.0727, + "step": 2499 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589616628175472, + "loss": 1.0439, + "step": 2500 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589311608774057, + "loss": 1.2308, + "step": 2501 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015890065053496613, + "loss": 1.1155, + "step": 2502 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015887013179457862, + "loss": 1.3345, + "step": 2503 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015883960466059444, + "loss": 0.9551, + "step": 2504 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001588090691373661, + "loss": 1.0713, + "step": 2505 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015877852522924732, + "loss": 1.299, + "step": 2506 + }, + { + "epoch": 2.98, + "learning_rate": 0.000158747972940593, + "loss": 0.8535, + "step": 2507 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001587174122757592, + "loss": 0.9924, + "step": 2508 + }, + { + "epoch": 2.98, + "eval_loss": 2.328662395477295, + "eval_runtime": 283.7765, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2508 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001586868432391032, + "loss": 1.0512, + "step": 2509 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015865626583498355, + "loss": 1.2775, + "step": 2510 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015862568006775983, + "loss": 0.7054, + "step": 2511 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015859508594179294, + "loss": 0.8524, + "step": 2512 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015856448346144496, + "loss": 0.9871, + "step": 2513 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015853387263107909, + "loss": 0.8642, + "step": 2514 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015850325345505975, + "loss": 1.1789, + "step": 2515 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015847262593775266, + "loss": 1.2765, + "step": 2516 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015844199008352458, + "loss": 0.6272, + "step": 2517 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015841134589674352, + "loss": 1.3037, + "step": 2518 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015838069338177863, + "loss": 1.054, + "step": 2519 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015835003254300039, + "loss": 1.1942, + "step": 2520 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015831936338478025, + "loss": 0.8866, + "step": 2521 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015828868591149104, + "loss": 1.1444, + "step": 2522 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015825800012750666, + "loss": 0.8597, + "step": 2523 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001582273060372023, + "loss": 0.7731, + "step": 2524 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015819660364495416, + "loss": 1.1953, + "step": 2525 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001581658929551398, + "loss": 1.3946, + "step": 2526 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015813517397213791, + "loss": 1.0173, + "step": 2527 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015810444670032831, + "loss": 1.1762, + "step": 2528 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015807371114409202, + "loss": 0.7283, + "step": 2529 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015804296730781135, + "loss": 1.1515, + "step": 2530 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015801221519586958, + "loss": 0.9389, + "step": 2531 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001579814548126514, + "loss": 1.1869, + "step": 2532 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015795068616254247, + "loss": 1.2957, + "step": 2533 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015791990924992981, + "loss": 1.0514, + "step": 2534 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015788912407920148, + "loss": 0.6762, + "step": 2535 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015785833065474683, + "loss": 0.4121, + "step": 2536 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015782752898095627, + "loss": 0.4532, + "step": 2537 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577967190622215, + "loss": 0.4847, + "step": 2538 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577659009029353, + "loss": 0.8313, + "step": 2539 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015773507450749172, + "loss": 0.5304, + "step": 2540 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015770423988028588, + "loss": 0.6003, + "step": 2541 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015767339702571414, + "loss": 0.3988, + "step": 2542 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015764254594817398, + "loss": 0.6133, + "step": 2543 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001576116866520642, + "loss": 0.4858, + "step": 2544 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015758081914178456, + "loss": 0.3691, + "step": 2545 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575499434217361, + "loss": 0.5441, + "step": 2546 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575190594963211, + "loss": 0.4605, + "step": 2547 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015748816736994284, + "loss": 0.3681, + "step": 2548 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015745726704700593, + "loss": 0.4113, + "step": 2549 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015742635853191608, + "loss": 0.5233, + "step": 2550 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015739544182908014, + "loss": 0.356, + "step": 2551 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015736451694290616, + "loss": 0.4105, + "step": 2552 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015733358387780337, + "loss": 0.4451, + "step": 2553 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015730264263818212, + "loss": 0.5023, + "step": 2554 + }, + { + "epoch": 3.02, + "learning_rate": 0.000157271693228454, + "loss": 0.3671, + "step": 2555 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001572407356530317, + "loss": 0.7077, + "step": 2556 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015720976991632913, + "loss": 0.4439, + "step": 2557 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015717879602276122, + "loss": 0.5961, + "step": 2558 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001571478139767443, + "loss": 0.4269, + "step": 2559 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015711682378269565, + "loss": 0.3427, + "step": 2560 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015708582544503386, + "loss": 0.5736, + "step": 2561 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015705481896817854, + "loss": 0.3707, + "step": 2562 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001570238043565506, + "loss": 0.4076, + "step": 2563 + }, + { + "epoch": 3.03, + "learning_rate": 0.000156992781614572, + "loss": 0.6514, + "step": 2564 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015696175074666598, + "loss": 0.4012, + "step": 2565 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001569307117572568, + "loss": 0.3492, + "step": 2566 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015689966465076992, + "loss": 0.4121, + "step": 2567 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015686860943163206, + "loss": 0.5769, + "step": 2568 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015683754610427094, + "loss": 0.4872, + "step": 2569 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015680647467311557, + "loss": 0.5518, + "step": 2570 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015677539514259608, + "loss": 0.411, + "step": 2571 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015674430751714361, + "loss": 0.3443, + "step": 2572 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015671321180119074, + "loss": 0.3706, + "step": 2573 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566821079991709, + "loss": 0.6168, + "step": 2574 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566509961155189, + "loss": 0.3726, + "step": 2575 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015661987615467058, + "loss": 0.3976, + "step": 2576 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015658874812106297, + "loss": 0.3697, + "step": 2577 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015655761201913425, + "loss": 0.2759, + "step": 2578 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015652646785332378, + "loss": 0.3572, + "step": 2579 + }, + { + "epoch": 3.05, + "learning_rate": 0.000156495315628072, + "loss": 0.5333, + "step": 2580 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015646415534782056, + "loss": 0.4004, + "step": 2581 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001564329870170122, + "loss": 0.4736, + "step": 2582 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015640181064009088, + "loss": 0.4814, + "step": 2583 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015637062622150168, + "loss": 0.3351, + "step": 2584 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015633943376569081, + "loss": 0.4497, + "step": 2585 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015630823327710558, + "loss": 0.4202, + "step": 2586 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015627702476019457, + "loss": 0.5934, + "step": 2587 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001562458082194074, + "loss": 0.4664, + "step": 2588 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015621458365919487, + "loss": 0.4077, + "step": 2589 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015618335108400893, + "loss": 0.5244, + "step": 2590 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015615211049830268, + "loss": 0.5042, + "step": 2591 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015612086190653027, + "loss": 0.3442, + "step": 2592 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015608960531314717, + "loss": 0.6337, + "step": 2593 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015605834072260984, + "loss": 0.3542, + "step": 2594 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001560270681393759, + "loss": 0.5113, + "step": 2595 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001559957875679042, + "loss": 0.4346, + "step": 2596 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015596449901265463, + "loss": 0.5231, + "step": 2597 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015593320247808822, + "loss": 0.5193, + "step": 2598 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001559018979686673, + "loss": 0.3575, + "step": 2599 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015587058548885505, + "loss": 0.6356, + "step": 2600 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015583926504311605, + "loss": 0.3313, + "step": 2601 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015580793663591585, + "loss": 0.356, + "step": 2602 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015577660027172127, + "loss": 0.5498, + "step": 2603 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557452559550001, + "loss": 0.3973, + "step": 2604 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557139036902215, + "loss": 0.4751, + "step": 2605 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015568254348185544, + "loss": 0.4297, + "step": 2606 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015565117533437335, + "loss": 0.4299, + "step": 2607 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015561979925224754, + "loss": 0.4651, + "step": 2608 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015558841523995162, + "loss": 0.474, + "step": 2609 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015555702330196023, + "loss": 0.4143, + "step": 2610 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001555256234427492, + "loss": 0.393, + "step": 2611 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015549421566679546, + "loss": 0.3738, + "step": 2612 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015546279997857704, + "loss": 0.4394, + "step": 2613 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001554313763825732, + "loss": 0.3702, + "step": 2614 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015539994488326418, + "loss": 0.4594, + "step": 2615 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015536850548513147, + "loss": 0.3249, + "step": 2616 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015533705819265764, + "loss": 0.3857, + "step": 2617 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001553056030103264, + "loss": 0.3272, + "step": 2618 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015527413994262257, + "loss": 0.5204, + "step": 2619 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015524266899403206, + "loss": 0.3653, + "step": 2620 + }, + { + "epoch": 3.1, + "learning_rate": 0.000155211190169042, + "loss": 0.4698, + "step": 2621 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001551797034721405, + "loss": 0.5949, + "step": 2622 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015514820890781693, + "loss": 0.4074, + "step": 2623 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015511670648056178, + "loss": 0.3586, + "step": 2624 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550851961948665, + "loss": 0.6494, + "step": 2625 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015505367805522383, + "loss": 0.4914, + "step": 2626 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550221520661276, + "loss": 0.4594, + "step": 2627 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015499061823207266, + "loss": 0.4102, + "step": 2628 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015495907655755506, + "loss": 0.4229, + "step": 2629 + }, + { + "epoch": 3.11, + "learning_rate": 0.000154927527047072, + "loss": 0.7218, + "step": 2630 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001548959697051217, + "loss": 0.6929, + "step": 2631 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015486440453620358, + "loss": 0.3628, + "step": 2632 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015483283154481815, + "loss": 0.4433, + "step": 2633 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015480125073546704, + "loss": 0.3912, + "step": 2634 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547696621126529, + "loss": 0.3682, + "step": 2635 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015473806568087968, + "loss": 0.354, + "step": 2636 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547064614446523, + "loss": 0.4789, + "step": 2637 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001546748494084768, + "loss": 0.382, + "step": 2638 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015464322957686041, + "loss": 0.4954, + "step": 2639 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015461160195431148, + "loss": 0.3273, + "step": 2640 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545799665453393, + "loss": 0.3414, + "step": 2641 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015454832335445447, + "loss": 0.5479, + "step": 2642 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545166723861686, + "loss": 0.4963, + "step": 2643 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015448501364499445, + "loss": 0.5547, + "step": 2644 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001544533471354458, + "loss": 0.4637, + "step": 2645 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015442167286203767, + "loss": 0.4248, + "step": 2646 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015438999082928608, + "loss": 0.4213, + "step": 2647 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015435830104170822, + "loss": 0.3734, + "step": 2648 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015432660350382234, + "loss": 0.4627, + "step": 2649 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001542948982201479, + "loss": 0.3422, + "step": 2650 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015426318519520525, + "loss": 0.4409, + "step": 2651 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015423146443351607, + "loss": 0.3717, + "step": 2652 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015419973593960298, + "loss": 0.4349, + "step": 2653 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015416799971798985, + "loss": 0.5349, + "step": 2654 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001541362557732015, + "loss": 0.4511, + "step": 2655 + }, + { + "epoch": 3.15, + "learning_rate": 0.000154104504109764, + "loss": 0.5997, + "step": 2656 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015407274473220434, + "loss": 0.661, + "step": 2657 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015404097764505083, + "loss": 0.3456, + "step": 2658 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015400920285283268, + "loss": 0.3416, + "step": 2659 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015397742036008034, + "loss": 0.4707, + "step": 2660 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015394563017132526, + "loss": 0.3221, + "step": 2661 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015391383229110007, + "loss": 0.6108, + "step": 2662 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015388202672393834, + "loss": 0.5504, + "step": 2663 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015385021347437498, + "loss": 0.3973, + "step": 2664 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015381839254694583, + "loss": 0.5149, + "step": 2665 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015378656394618787, + "loss": 0.5853, + "step": 2666 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001537547276766391, + "loss": 0.517, + "step": 2667 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015372288374283875, + "loss": 0.5485, + "step": 2668 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015369103214932703, + "loss": 0.4907, + "step": 2669 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001536591729006453, + "loss": 0.3169, + "step": 2670 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015362730600133596, + "loss": 0.5431, + "step": 2671 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015359543145594258, + "loss": 0.2586, + "step": 2672 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015356354926900979, + "loss": 0.5251, + "step": 2673 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015353165944508325, + "loss": 0.4104, + "step": 2674 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015349976198870973, + "loss": 0.4825, + "step": 2675 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015346785690443718, + "loss": 0.5274, + "step": 2676 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534359441968145, + "loss": 0.3878, + "step": 2677 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534040238703918, + "loss": 0.5132, + "step": 2678 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015337209592972023, + "loss": 0.5145, + "step": 2679 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015334016037935196, + "loss": 0.5548, + "step": 2680 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015330821722384037, + "loss": 0.7494, + "step": 2681 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015327626646773976, + "loss": 0.5569, + "step": 2682 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015324430811560573, + "loss": 0.2622, + "step": 2683 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001532123421719948, + "loss": 0.3749, + "step": 2684 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015318036864146457, + "loss": 0.3959, + "step": 2685 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531483875285738, + "loss": 0.5243, + "step": 2686 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531163988378823, + "loss": 0.3115, + "step": 2687 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015308440257395093, + "loss": 0.2385, + "step": 2688 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015305239874134174, + "loss": 0.4431, + "step": 2689 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001530203873446177, + "loss": 0.378, + "step": 2690 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015298836838834298, + "loss": 0.4521, + "step": 2691 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015295634187708279, + "loss": 0.6309, + "step": 2692 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015292430781540335, + "loss": 0.4355, + "step": 2693 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015289226620787208, + "loss": 0.4537, + "step": 2694 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001528602170590574, + "loss": 0.4305, + "step": 2695 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015282816037352878, + "loss": 0.5355, + "step": 2696 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015279609615585687, + "loss": 0.5243, + "step": 2697 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001527640244106133, + "loss": 0.5334, + "step": 2698 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015273194514237078, + "loss": 0.5409, + "step": 2699 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001526998583557031, + "loss": 0.4042, + "step": 2700 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015266776405518524, + "loss": 0.5536, + "step": 2701 + }, + { + "epoch": 3.2, + "learning_rate": 0.000152635662245393, + "loss": 0.2743, + "step": 2702 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015260355293090353, + "loss": 0.4762, + "step": 2703 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015257143611629482, + "loss": 0.4552, + "step": 2704 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001525393118061461, + "loss": 0.5395, + "step": 2705 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001525071800050375, + "loss": 0.4297, + "step": 2706 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015247504071755046, + "loss": 0.364, + "step": 2707 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015244289394826722, + "loss": 0.9499, + "step": 2708 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015241073970177126, + "loss": 0.579, + "step": 2709 + }, + { + "epoch": 3.21, + "learning_rate": 0.000152378577982647, + "loss": 0.3111, + "step": 2710 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001523464087954801, + "loss": 0.3345, + "step": 2711 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015231423214485715, + "loss": 0.4628, + "step": 2712 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015228204803536586, + "loss": 0.4803, + "step": 2713 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001522498564715949, + "loss": 0.4164, + "step": 2714 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015221765745813417, + "loss": 0.6468, + "step": 2715 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015218545099957449, + "loss": 0.4495, + "step": 2716 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015215323710050785, + "loss": 0.4184, + "step": 2717 + }, + { + "epoch": 3.22, + "eval_loss": 2.9206559658050537, + "eval_runtime": 283.9002, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2717 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015212101576552722, + "loss": 0.4215, + "step": 2718 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015208878699922668, + "loss": 1.4488, + "step": 2719 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520565508062013, + "loss": 0.4449, + "step": 2720 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520243071910473, + "loss": 0.2853, + "step": 2721 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015199205615836191, + "loss": 0.4572, + "step": 2722 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015195979771274342, + "loss": 0.4436, + "step": 2723 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001519275318587912, + "loss": 0.38, + "step": 2724 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015189525860110563, + "loss": 0.4956, + "step": 2725 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015186297794428816, + "loss": 0.8514, + "step": 2726 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015183068989294132, + "loss": 0.4518, + "step": 2727 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015179839445166873, + "loss": 0.5581, + "step": 2728 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015176609162507497, + "loss": 0.5828, + "step": 2729 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015173378141776568, + "loss": 0.4109, + "step": 2730 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015170146383434768, + "loss": 0.5762, + "step": 2731 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015166913887942868, + "loss": 0.4502, + "step": 2732 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015163680655761757, + "loss": 0.3736, + "step": 2733 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015160446687352417, + "loss": 0.3771, + "step": 2734 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015157211983175947, + "loss": 0.469, + "step": 2735 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015153976543693542, + "loss": 0.665, + "step": 2736 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015150740369366508, + "loss": 0.3495, + "step": 2737 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514750346065625, + "loss": 0.4513, + "step": 2738 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514426581802428, + "loss": 0.4571, + "step": 2739 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015141027441932216, + "loss": 0.4197, + "step": 2740 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015137788332841785, + "loss": 0.3396, + "step": 2741 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015134548491214806, + "loss": 0.3547, + "step": 2742 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015131307917513214, + "loss": 0.3073, + "step": 2743 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015128066612199044, + "loss": 0.7091, + "step": 2744 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015124824575734428, + "loss": 0.2845, + "step": 2745 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015121581808581622, + "loss": 0.2903, + "step": 2746 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015118338311202964, + "loss": 0.4065, + "step": 2747 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015115094084060916, + "loss": 0.6152, + "step": 2748 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015111849127618022, + "loss": 0.5352, + "step": 2749 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001510860344233695, + "loss": 0.414, + "step": 2750 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015105357028680457, + "loss": 0.4756, + "step": 2751 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015102109887111422, + "loss": 0.4644, + "step": 2752 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015098862018092808, + "loss": 0.4231, + "step": 2753 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015095613422087692, + "loss": 0.4617, + "step": 2754 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001509236409955925, + "loss": 0.5876, + "step": 2755 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508911405097077, + "loss": 0.5696, + "step": 2756 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015085863276785637, + "loss": 0.3826, + "step": 2757 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508261177746734, + "loss": 0.4338, + "step": 2758 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001507935955347947, + "loss": 0.3546, + "step": 2759 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015076106605285724, + "loss": 0.413, + "step": 2760 + }, + { + "epoch": 3.27, + "learning_rate": 0.000150728529333499, + "loss": 0.3954, + "step": 2761 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015069598538135906, + "loss": 0.5214, + "step": 2762 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001506634342010774, + "loss": 0.5239, + "step": 2763 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015063087579729519, + "loss": 0.8681, + "step": 2764 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015059831017465449, + "loss": 0.4616, + "step": 2765 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015056573733779848, + "loss": 0.4721, + "step": 2766 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015053315729137128, + "loss": 0.4449, + "step": 2767 + }, + { + "epoch": 3.28, + "learning_rate": 0.0001505005700400182, + "loss": 0.569, + "step": 2768 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015046797558838535, + "loss": 0.4926, + "step": 2769 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015043537394112007, + "loss": 0.462, + "step": 2770 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015040276510287063, + "loss": 0.6983, + "step": 2771 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015037014907828632, + "loss": 0.4644, + "step": 2772 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001503375258720175, + "loss": 0.5924, + "step": 2773 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015030489548871544, + "loss": 0.5282, + "step": 2774 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015027225793303264, + "loss": 0.4757, + "step": 2775 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015023961320962247, + "loss": 0.5014, + "step": 2776 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001502069613231393, + "loss": 0.3455, + "step": 2777 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015017430227823864, + "loss": 0.4525, + "step": 2778 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001501416360795769, + "loss": 0.51, + "step": 2779 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015010896273181165, + "loss": 0.3766, + "step": 2780 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001500762822396013, + "loss": 0.3162, + "step": 2781 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015004359460760546, + "loss": 0.406, + "step": 2782 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015001089984048463, + "loss": 0.4671, + "step": 2783 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014997819794290034, + "loss": 0.4299, + "step": 2784 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014994548891951524, + "loss": 0.5494, + "step": 2785 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001499127727749929, + "loss": 0.351, + "step": 2786 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014988004951399785, + "loss": 0.3807, + "step": 2787 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014984731914119586, + "loss": 0.3999, + "step": 2788 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001498145816612534, + "loss": 0.7609, + "step": 2789 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014978183707883827, + "loss": 0.4466, + "step": 2790 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014974908539861905, + "loss": 0.592, + "step": 2791 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014971632662526545, + "loss": 0.4786, + "step": 2792 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014968356076344814, + "loss": 0.4087, + "step": 2793 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001496507878178388, + "loss": 0.3811, + "step": 2794 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014961800779311014, + "loss": 0.4091, + "step": 2795 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014958522069393593, + "loss": 0.6861, + "step": 2796 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014955242652499084, + "loss": 0.3346, + "step": 2797 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014951962529095064, + "loss": 0.5417, + "step": 2798 + }, + { + "epoch": 3.32, + "learning_rate": 0.000149486816996492, + "loss": 0.7325, + "step": 2799 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014945400164629278, + "loss": 0.5007, + "step": 2800 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014942117924503164, + "loss": 0.4217, + "step": 2801 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014938834979738835, + "loss": 0.5265, + "step": 2802 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014935551330804372, + "loss": 0.4376, + "step": 2803 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001493226697816795, + "loss": 0.5068, + "step": 2804 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014928981922297842, + "loss": 0.6248, + "step": 2805 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492569616366243, + "loss": 0.593, + "step": 2806 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492240970273019, + "loss": 0.6713, + "step": 2807 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014919122539969697, + "loss": 0.5736, + "step": 2808 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014915834675849633, + "loss": 0.3006, + "step": 2809 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014912546110838775, + "loss": 0.5175, + "step": 2810 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014909256845405998, + "loss": 0.52, + "step": 2811 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014905966880020282, + "loss": 0.5491, + "step": 2812 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014902676215150702, + "loss": 0.6007, + "step": 2813 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001489938485126644, + "loss": 0.6552, + "step": 2814 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014896092788836763, + "loss": 0.3624, + "step": 2815 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001489280002833106, + "loss": 0.2626, + "step": 2816 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014889506570218796, + "loss": 0.409, + "step": 2817 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014886212414969553, + "loss": 0.473, + "step": 2818 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014882917563052998, + "loss": 0.4205, + "step": 2819 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014879622014938915, + "loss": 0.4603, + "step": 2820 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487632577109717, + "loss": 0.3522, + "step": 2821 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487302883199774, + "loss": 0.3787, + "step": 2822 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014869731198110695, + "loss": 0.6, + "step": 2823 + }, + { + "epoch": 3.35, + "learning_rate": 0.000148664328699062, + "loss": 0.4291, + "step": 2824 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014863133847854533, + "loss": 0.4358, + "step": 2825 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485983413242606, + "loss": 0.4144, + "step": 2826 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485653372409125, + "loss": 0.842, + "step": 2827 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014853232623320662, + "loss": 0.3398, + "step": 2828 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014849930830584972, + "loss": 0.5005, + "step": 2829 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014846628346354933, + "loss": 0.5777, + "step": 2830 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014843325171101413, + "loss": 0.3953, + "step": 2831 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014840021305295373, + "loss": 0.4056, + "step": 2832 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014836716749407872, + "loss": 0.7682, + "step": 2833 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001483341150391006, + "loss": 0.3208, + "step": 2834 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014830105569273204, + "loss": 0.4317, + "step": 2835 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014826798945968654, + "loss": 0.363, + "step": 2836 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014823491634467862, + "loss": 0.3784, + "step": 2837 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014820183635242374, + "loss": 0.9267, + "step": 2838 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001481687494876385, + "loss": 0.4245, + "step": 2839 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014813565575504022, + "loss": 0.3929, + "step": 2840 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014810255515934747, + "loss": 0.5171, + "step": 2841 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014806944770527958, + "loss": 0.5181, + "step": 2842 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014803633339755703, + "loss": 0.4765, + "step": 2843 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014800321224090114, + "loss": 0.4433, + "step": 2844 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014797008424003428, + "loss": 0.461, + "step": 2845 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001479369493996798, + "loss": 0.5688, + "step": 2846 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014790380772456197, + "loss": 0.4822, + "step": 2847 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001478706592194061, + "loss": 0.4993, + "step": 2848 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014783750388893842, + "loss": 0.3967, + "step": 2849 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014780434173788617, + "loss": 0.4708, + "step": 2850 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014777117277097758, + "loss": 0.5721, + "step": 2851 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014773799699294176, + "loss": 0.5276, + "step": 2852 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014770481440850891, + "loss": 0.4135, + "step": 2853 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001476716250224101, + "loss": 0.716, + "step": 2854 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014763842883937743, + "loss": 0.3663, + "step": 2855 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014760522586414396, + "loss": 0.4105, + "step": 2856 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014757201610144372, + "loss": 0.4554, + "step": 2857 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014753879955601163, + "loss": 0.4366, + "step": 2858 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001475055762325837, + "loss": 0.3752, + "step": 2859 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014747234613589685, + "loss": 0.3747, + "step": 2860 + }, + { + "epoch": 3.39, + "learning_rate": 0.000147439109270689, + "loss": 0.5533, + "step": 2861 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014740586564169892, + "loss": 0.4962, + "step": 2862 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014737261525366648, + "loss": 0.5318, + "step": 2863 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014733935811133244, + "loss": 0.4592, + "step": 2864 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014730609421943855, + "loss": 0.429, + "step": 2865 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014727282358272754, + "loss": 0.4163, + "step": 2866 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014723954620594304, + "loss": 0.4811, + "step": 2867 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001472062620938297, + "loss": 0.4662, + "step": 2868 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014717297125113311, + "loss": 0.531, + "step": 2869 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001471396736825998, + "loss": 0.3233, + "step": 2870 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014710636939297724, + "loss": 0.4171, + "step": 2871 + }, + { + "epoch": 3.4, + "learning_rate": 0.000147073058387014, + "loss": 0.5412, + "step": 2872 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014703974066945943, + "loss": 0.4357, + "step": 2873 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014700641624506392, + "loss": 0.3889, + "step": 2874 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469730851185788, + "loss": 0.456, + "step": 2875 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014693974729475636, + "loss": 0.4365, + "step": 2876 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469064027783499, + "loss": 0.3947, + "step": 2877 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014687305157411355, + "loss": 0.5718, + "step": 2878 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001468396936868025, + "loss": 0.4652, + "step": 2879 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014680632912117286, + "loss": 0.4242, + "step": 2880 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001467729578819817, + "loss": 0.5045, + "step": 2881 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014673957997398695, + "loss": 0.4098, + "step": 2882 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014670619540194766, + "loss": 0.597, + "step": 2883 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014667280417062374, + "loss": 0.5208, + "step": 2884 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014663940628477598, + "loss": 0.4881, + "step": 2885 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014660600174916627, + "loss": 0.5234, + "step": 2886 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001465725905685573, + "loss": 0.439, + "step": 2887 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014653917274771284, + "loss": 0.4498, + "step": 2888 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014650574829139747, + "loss": 0.4837, + "step": 2889 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014647231720437686, + "loss": 0.4232, + "step": 2890 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014643887949141753, + "loss": 0.4467, + "step": 2891 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014640543515728695, + "loss": 0.3566, + "step": 2892 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014637198420675354, + "loss": 0.3888, + "step": 2893 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014633852664458673, + "loss": 0.326, + "step": 2894 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001463050624755568, + "loss": 0.3608, + "step": 2895 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014627159170443502, + "loss": 0.5326, + "step": 2896 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014623811433599359, + "loss": 0.3171, + "step": 2897 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014620463037500568, + "loss": 0.4619, + "step": 2898 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014617113982624526, + "loss": 0.7739, + "step": 2899 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014613764269448751, + "loss": 0.4327, + "step": 2900 + }, + { + "epoch": 3.44, + "learning_rate": 0.0001461041389845083, + "loss": 0.6078, + "step": 2901 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014607062870108456, + "loss": 0.3863, + "step": 2902 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014603711184899408, + "loss": 0.4787, + "step": 2903 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014600358843301568, + "loss": 0.2997, + "step": 2904 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014597005845792905, + "loss": 0.3657, + "step": 2905 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014593652192851486, + "loss": 0.334, + "step": 2906 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014590297884955463, + "loss": 0.6809, + "step": 2907 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001458694292258309, + "loss": 0.4739, + "step": 2908 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014583587306212713, + "loss": 0.4139, + "step": 2909 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014580231036322768, + "loss": 0.3307, + "step": 2910 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014576874113391789, + "loss": 0.4155, + "step": 2911 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014573516537898394, + "loss": 0.4461, + "step": 2912 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014570158310321305, + "loss": 0.4775, + "step": 2913 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001456679943113933, + "loss": 0.344, + "step": 2914 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014563439900831373, + "loss": 0.3568, + "step": 2915 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014560079719876424, + "loss": 0.3808, + "step": 2916 + }, + { + "epoch": 3.46, + "learning_rate": 0.0001455671888875358, + "loss": 0.5467, + "step": 2917 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014553357407942022, + "loss": 0.5267, + "step": 2918 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014549995277921015, + "loss": 0.4476, + "step": 2919 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014546632499169937, + "loss": 0.4463, + "step": 2920 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014543269072168235, + "loss": 0.5553, + "step": 2921 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014539904997395468, + "loss": 0.5476, + "step": 2922 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001453654027533128, + "loss": 0.4443, + "step": 2923 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014533174906455404, + "loss": 0.4353, + "step": 2924 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014529808891247667, + "loss": 0.4479, + "step": 2925 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014526442230187995, + "loss": 0.3951, + "step": 2926 + }, + { + "epoch": 3.47, + "eval_loss": 2.882225751876831, + "eval_runtime": 283.9462, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2926 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014523074923756394, + "loss": 0.679, + "step": 2927 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451970697243297, + "loss": 0.4178, + "step": 2928 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451633837669792, + "loss": 0.4121, + "step": 2929 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014512969137031538, + "loss": 0.3929, + "step": 2930 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014509599253914195, + "loss": 0.366, + "step": 2931 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001450622872782637, + "loss": 0.3528, + "step": 2932 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014502857559248617, + "loss": 0.5003, + "step": 2933 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014499485748661604, + "loss": 0.4901, + "step": 2934 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014496113296546067, + "loss": 0.4538, + "step": 2935 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014492740203382847, + "loss": 0.4549, + "step": 2936 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001448936646965288, + "loss": 0.5464, + "step": 2937 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014485992095837177, + "loss": 0.43, + "step": 2938 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014482617082416858, + "loss": 0.5893, + "step": 2939 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447924142987312, + "loss": 0.4947, + "step": 2940 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014475865138687262, + "loss": 0.4903, + "step": 2941 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447248820934067, + "loss": 0.4933, + "step": 2942 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014469110642314817, + "loss": 0.4516, + "step": 2943 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001446573243809127, + "loss": 0.469, + "step": 2944 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014462353597151684, + "loss": 0.6531, + "step": 2945 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014458974119977818, + "loss": 0.2754, + "step": 2946 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001445559400705151, + "loss": 0.5676, + "step": 2947 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014452213258854684, + "loss": 0.5903, + "step": 2948 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014448831875869364, + "loss": 0.5022, + "step": 2949 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001444544985857766, + "loss": 0.3509, + "step": 2950 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014442067207461775, + "loss": 0.3921, + "step": 2951 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014438683923004005, + "loss": 0.4997, + "step": 2952 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014435300005686728, + "loss": 0.6218, + "step": 2953 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014431915455992414, + "loss": 0.4097, + "step": 2954 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014428530274403632, + "loss": 0.3478, + "step": 2955 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014425144461403035, + "loss": 0.4506, + "step": 2956 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014421758017473362, + "loss": 0.4025, + "step": 2957 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014418370943097448, + "loss": 0.3838, + "step": 2958 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014414983238758217, + "loss": 0.6366, + "step": 2959 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014411594904938682, + "loss": 0.4649, + "step": 2960 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014408205942121943, + "loss": 0.3361, + "step": 2961 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014404816350791188, + "loss": 0.3692, + "step": 2962 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001440142613142971, + "loss": 0.6162, + "step": 2963 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014398035284520874, + "loss": 0.5935, + "step": 2964 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001439464381054814, + "loss": 0.545, + "step": 2965 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014391251709995061, + "loss": 0.4178, + "step": 2966 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014387858983345276, + "loss": 0.5552, + "step": 2967 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001438446563108251, + "loss": 0.4506, + "step": 2968 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014381071653690587, + "loss": 0.429, + "step": 2969 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014377677051653404, + "loss": 0.3897, + "step": 2970 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001437428182545497, + "loss": 0.4663, + "step": 2971 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014370885975579364, + "loss": 0.4643, + "step": 2972 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436748950251076, + "loss": 0.5433, + "step": 2973 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436409240673342, + "loss": 0.4967, + "step": 2974 + }, + { + "epoch": 3.53, + "learning_rate": 0.000143606946887317, + "loss": 0.3717, + "step": 2975 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014357296348990037, + "loss": 0.4166, + "step": 2976 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001435389738799296, + "loss": 0.455, + "step": 2977 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014350497806225087, + "loss": 0.4603, + "step": 2978 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014347097604171127, + "loss": 0.4325, + "step": 2979 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001434369678231587, + "loss": 0.4375, + "step": 2980 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014340295341144202, + "loss": 0.4932, + "step": 2981 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014336893281141096, + "loss": 0.5264, + "step": 2982 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014333490602791608, + "loss": 0.4677, + "step": 2983 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014330087306580887, + "loss": 0.6505, + "step": 2984 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014326683392994167, + "loss": 0.4451, + "step": 2985 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014323278862516775, + "loss": 0.4025, + "step": 2986 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001431987371563412, + "loss": 0.5084, + "step": 2987 + }, + { + "epoch": 3.54, + "learning_rate": 0.000143164679528317, + "loss": 0.4806, + "step": 2988 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014313061574595115, + "loss": 0.3954, + "step": 2989 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014309654581410024, + "loss": 0.4339, + "step": 2990 + }, + { + "epoch": 3.55, + "learning_rate": 0.000143062469737622, + "loss": 0.6739, + "step": 2991 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014302838752137487, + "loss": 0.6414, + "step": 2992 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014299429917021827, + "loss": 0.5075, + "step": 2993 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014296020468901246, + "loss": 0.4105, + "step": 2994 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014292610408261856, + "loss": 0.7371, + "step": 2995 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014289199735589852, + "loss": 0.7485, + "step": 2996 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014285788451371534, + "loss": 0.7629, + "step": 2997 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014282376556093264, + "loss": 0.3849, + "step": 2998 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014278964050241512, + "loss": 0.5355, + "step": 2999 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014275550934302823, + "loss": 0.4077, + "step": 3000 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014272137208763832, + "loss": 0.5352, + "step": 3001 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014268722874111265, + "loss": 0.5257, + "step": 3002 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014265307930831932, + "loss": 0.4265, + "step": 3003 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014261892379412728, + "loss": 0.5776, + "step": 3004 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001425847622034063, + "loss": 0.3521, + "step": 3005 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014255059454102722, + "loss": 0.6203, + "step": 3006 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014251642081186146, + "loss": 0.5238, + "step": 3007 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014248224102078152, + "loss": 0.3887, + "step": 3008 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014244805517266067, + "loss": 0.5001, + "step": 3009 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001424138632723731, + "loss": 0.555, + "step": 3010 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423796653247938, + "loss": 0.6137, + "step": 3011 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014234546133479867, + "loss": 0.8052, + "step": 3012 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423112513072644, + "loss": 0.5392, + "step": 3013 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014227703524706867, + "loss": 0.5067, + "step": 3014 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001422428131590899, + "loss": 0.4016, + "step": 3015 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014220858504820742, + "loss": 0.4165, + "step": 3016 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014217435091930141, + "loss": 0.7395, + "step": 3017 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014214011077725292, + "loss": 0.4985, + "step": 3018 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014210586462694384, + "loss": 0.4821, + "step": 3019 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014207161247325691, + "loss": 0.6046, + "step": 3020 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014203735432107576, + "loss": 0.568, + "step": 3021 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014200309017528486, + "loss": 0.7383, + "step": 3022 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001419688200407695, + "loss": 0.5296, + "step": 3023 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014193454392241592, + "loss": 0.6391, + "step": 3024 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014190026182511102, + "loss": 0.4523, + "step": 3025 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001418659737537428, + "loss": 0.482, + "step": 3026 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014183167971319998, + "loss": 0.4519, + "step": 3027 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014179737970837207, + "loss": 0.4156, + "step": 3028 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014176307374414956, + "loss": 0.5142, + "step": 3029 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014172876182542372, + "loss": 0.4068, + "step": 3030 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014169444395708666, + "loss": 0.5908, + "step": 3031 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001416601201440314, + "loss": 0.511, + "step": 3032 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014162579039115174, + "loss": 0.5165, + "step": 3033 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014159145470334235, + "loss": 0.4449, + "step": 3034 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014155711308549878, + "loss": 0.4808, + "step": 3035 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014152276554251736, + "loss": 0.5365, + "step": 3036 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014148841207929527, + "loss": 0.6016, + "step": 3037 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001414540527007307, + "loss": 0.379, + "step": 3038 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014141968741172238, + "loss": 0.6687, + "step": 3039 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014138531621717018, + "loss": 0.6219, + "step": 3040 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001413509391219746, + "loss": 0.3408, + "step": 3041 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014131655613103708, + "loss": 0.5148, + "step": 3042 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412821672492599, + "loss": 0.3811, + "step": 3043 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412477724815462, + "loss": 0.4691, + "step": 3044 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014121337183279988, + "loss": 0.6919, + "step": 3045 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001411789653079257, + "loss": 0.5804, + "step": 3046 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014114455291182933, + "loss": 0.418, + "step": 3047 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001411101346494172, + "loss": 0.4422, + "step": 3048 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410757105255966, + "loss": 0.389, + "step": 3049 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410412805452757, + "loss": 0.4083, + "step": 3050 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410068447133634, + "loss": 0.8703, + "step": 3051 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014097240303476954, + "loss": 0.4724, + "step": 3052 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014093795551440474, + "loss": 0.6257, + "step": 3053 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014090350215718048, + "loss": 0.5212, + "step": 3054 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014086904296800902, + "loss": 0.4429, + "step": 3055 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014083457795180355, + "loss": 0.3496, + "step": 3056 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014080010711347798, + "loss": 0.3402, + "step": 3057 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001407656304579471, + "loss": 0.4783, + "step": 3058 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014073114799012653, + "loss": 0.3987, + "step": 3059 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014069665971493274, + "loss": 0.4755, + "step": 3060 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014066216563728303, + "loss": 0.4792, + "step": 3061 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014062766576209546, + "loss": 0.4275, + "step": 3062 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014059316009428893, + "loss": 0.3598, + "step": 3063 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014055864863878325, + "loss": 0.4887, + "step": 3064 + }, + { + "epoch": 3.64, + "learning_rate": 0.000140524131400499, + "loss": 0.5421, + "step": 3065 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014048960838435753, + "loss": 0.352, + "step": 3066 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014045507959528118, + "loss": 0.3124, + "step": 3067 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014042054503819287, + "loss": 0.3955, + "step": 3068 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014038600471801658, + "loss": 0.455, + "step": 3069 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014035145863967692, + "loss": 0.5177, + "step": 3070 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014031690680809945, + "loss": 0.4205, + "step": 3071 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014028234922821054, + "loss": 0.4832, + "step": 3072 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402477859049373, + "loss": 0.3496, + "step": 3073 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402132168432077, + "loss": 0.5404, + "step": 3074 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014017864204795058, + "loss": 0.5106, + "step": 3075 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001401440615240955, + "loss": 0.6611, + "step": 3076 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014010947527657295, + "loss": 0.3879, + "step": 3077 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400748833103141, + "loss": 0.3054, + "step": 3078 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014004028563025108, + "loss": 0.3461, + "step": 3079 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400056822413167, + "loss": 0.482, + "step": 3080 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001399710731484447, + "loss": 0.3285, + "step": 3081 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013993645835656953, + "loss": 0.363, + "step": 3082 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013990183787062661, + "loss": 0.5092, + "step": 3083 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013986721169555194, + "loss": 0.3009, + "step": 3084 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013983257983628253, + "loss": 0.3831, + "step": 3085 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001397979422977561, + "loss": 0.3718, + "step": 3086 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013976329908491118, + "loss": 0.3401, + "step": 3087 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013972865020268722, + "loss": 0.5294, + "step": 3088 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013969399565602435, + "loss": 0.5054, + "step": 3089 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001396593354498635, + "loss": 0.4247, + "step": 3090 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013962466958914658, + "loss": 0.431, + "step": 3091 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013958999807881604, + "loss": 0.6341, + "step": 3092 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001395553209238154, + "loss": 0.5126, + "step": 3093 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013952063812908881, + "loss": 0.3775, + "step": 3094 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001394859496995813, + "loss": 0.5149, + "step": 3095 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013945125564023868, + "loss": 0.2879, + "step": 3096 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013941655595600756, + "loss": 0.5621, + "step": 3097 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013938185065183532, + "loss": 0.408, + "step": 3098 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013934713973267024, + "loss": 0.4247, + "step": 3099 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001393124232034613, + "loss": 0.4224, + "step": 3100 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001392777010691584, + "loss": 0.4142, + "step": 3101 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013924297333471204, + "loss": 0.6004, + "step": 3102 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013920824000507374, + "loss": 0.6016, + "step": 3103 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001391735010851956, + "loss": 0.4669, + "step": 3104 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013913875658003074, + "loss": 0.3987, + "step": 3105 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001391040064945329, + "loss": 0.471, + "step": 3106 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001390692508336568, + "loss": 0.6135, + "step": 3107 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013903448960235766, + "loss": 0.5369, + "step": 3108 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013899972280559183, + "loss": 0.3295, + "step": 3109 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001389649504483162, + "loss": 0.309, + "step": 3110 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013893017253548858, + "loss": 0.4026, + "step": 3111 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013889538907206755, + "loss": 0.4724, + "step": 3112 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001388606000630125, + "loss": 0.3606, + "step": 3113 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001388258055132835, + "loss": 0.4894, + "step": 3114 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387910054278416, + "loss": 0.4832, + "step": 3115 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387561998116484, + "loss": 0.4604, + "step": 3116 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013872138866966656, + "loss": 0.4377, + "step": 3117 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013868657200685934, + "loss": 0.3965, + "step": 3118 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001386517498281908, + "loss": 0.7653, + "step": 3119 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013861692213862584, + "loss": 0.5213, + "step": 3120 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013858208894313017, + "loss": 0.9296, + "step": 3121 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013854725024667016, + "loss": 0.7738, + "step": 3122 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013851240605421315, + "loss": 0.5826, + "step": 3123 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384775563707271, + "loss": 0.5502, + "step": 3124 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013844270120118085, + "loss": 0.3535, + "step": 3125 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384078405505439, + "loss": 0.4853, + "step": 3126 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013837297442378675, + "loss": 0.5819, + "step": 3127 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013833810282588044, + "loss": 0.3728, + "step": 3128 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013830322576179697, + "loss": 0.3327, + "step": 3129 + }, + { + "epoch": 3.71, + "learning_rate": 0.000138268343236509, + "loss": 0.4618, + "step": 3130 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013823345525499004, + "loss": 0.3377, + "step": 3131 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013819856182221434, + "loss": 0.3154, + "step": 3132 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013816366294315695, + "loss": 0.5116, + "step": 3133 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001381287586227937, + "loss": 0.4987, + "step": 3134 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013809384886610118, + "loss": 0.5596, + "step": 3135 + }, + { + "epoch": 3.72, + "eval_loss": 2.939779281616211, + "eval_runtime": 283.9953, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 3135 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013805893367805678, + "loss": 0.5128, + "step": 3136 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001380240130636386, + "loss": 0.3149, + "step": 3137 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013798908702782558, + "loss": 0.4984, + "step": 3138 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001379541555755974, + "loss": 0.626, + "step": 3139 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013791921871193457, + "loss": 0.4949, + "step": 3140 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013788427644181823, + "loss": 0.5654, + "step": 3141 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001378493287702305, + "loss": 0.4197, + "step": 3142 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013781437570215406, + "loss": 0.4341, + "step": 3143 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013777941724257253, + "loss": 0.3576, + "step": 3144 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013774445339647014, + "loss": 0.3098, + "step": 3145 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013770948416883205, + "loss": 0.6052, + "step": 3146 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013767450956464407, + "loss": 0.4327, + "step": 3147 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013763952958889287, + "loss": 0.4717, + "step": 3148 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001376045442465657, + "loss": 0.5263, + "step": 3149 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013756955354265085, + "loss": 0.5021, + "step": 3150 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013753455748213714, + "loss": 0.4066, + "step": 3151 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013749955607001433, + "loss": 0.3461, + "step": 3152 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013746454931127278, + "loss": 0.4318, + "step": 3153 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013742953721090372, + "loss": 0.4195, + "step": 3154 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001373945197738991, + "loss": 0.3862, + "step": 3155 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013735949700525163, + "loss": 0.5916, + "step": 3156 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013732446890995484, + "loss": 0.5336, + "step": 3157 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013728943549300295, + "loss": 0.4104, + "step": 3158 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013725439675939095, + "loss": 0.541, + "step": 3159 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013721935271411464, + "loss": 0.5173, + "step": 3160 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013718430336217045, + "loss": 0.3866, + "step": 3161 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013714924870855571, + "loss": 0.6113, + "step": 3162 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013711418875826846, + "loss": 0.5817, + "step": 3163 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370791235163075, + "loss": 0.5331, + "step": 3164 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013704405298767229, + "loss": 0.5744, + "step": 3165 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370089771773632, + "loss": 0.494, + "step": 3166 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013697389609038124, + "loss": 0.4537, + "step": 3167 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013693880973172822, + "loss": 0.5494, + "step": 3168 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013690371810640665, + "loss": 0.537, + "step": 3169 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001368686212194199, + "loss": 0.4698, + "step": 3170 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013683351907577194, + "loss": 0.5254, + "step": 3171 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013679841168046767, + "loss": 0.3857, + "step": 3172 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013676329903851254, + "loss": 0.4464, + "step": 3173 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001367281811549129, + "loss": 0.5651, + "step": 3174 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001366930580346758, + "loss": 0.4192, + "step": 3175 + }, + { + "epoch": 3.77, + "learning_rate": 0.000136657929682809, + "loss": 0.3364, + "step": 3176 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013662279610432104, + "loss": 0.3539, + "step": 3177 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013658765730422125, + "loss": 0.6074, + "step": 3178 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013655251328751957, + "loss": 0.5322, + "step": 3179 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013651736405922686, + "loss": 0.4176, + "step": 3180 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013648220962435458, + "loss": 0.4878, + "step": 3181 + }, + { + "epoch": 3.78, + "learning_rate": 0.000136447049987915, + "loss": 0.6351, + "step": 3182 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013641188515492109, + "loss": 0.4487, + "step": 3183 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363767151303866, + "loss": 0.4451, + "step": 3184 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013634153991932607, + "loss": 0.4944, + "step": 3185 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363063595267547, + "loss": 0.5932, + "step": 3186 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013627117395768833, + "loss": 0.4964, + "step": 3187 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001362359832171438, + "loss": 0.6795, + "step": 3188 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013620078731013845, + "loss": 0.3862, + "step": 3189 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361655862416905, + "loss": 0.3425, + "step": 3190 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361303800168188, + "loss": 0.4361, + "step": 3191 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001360951686405431, + "loss": 0.5774, + "step": 3192 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013605995211788365, + "loss": 0.4044, + "step": 3193 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013602473045386165, + "loss": 0.3858, + "step": 3194 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013598950365349883, + "loss": 0.6136, + "step": 3195 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013595427172181785, + "loss": 0.329, + "step": 3196 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013591903466384203, + "loss": 0.3898, + "step": 3197 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013588379248459536, + "loss": 0.4809, + "step": 3198 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013584854518910262, + "loss": 0.4108, + "step": 3199 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013581329278238927, + "loss": 0.4655, + "step": 3200 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013577803526948162, + "loss": 0.4657, + "step": 3201 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013574277265540654, + "loss": 0.4842, + "step": 3202 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013570750494519175, + "loss": 0.4593, + "step": 3203 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013567223214386564, + "loss": 0.435, + "step": 3204 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013563695425645737, + "loss": 0.7146, + "step": 3205 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013560167128799674, + "loss": 0.5027, + "step": 3206 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013556638324351442, + "loss": 0.4844, + "step": 3207 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013553109012804163, + "loss": 0.7605, + "step": 3208 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013549579194661044, + "loss": 0.396, + "step": 3209 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013546048870425356, + "loss": 0.5178, + "step": 3210 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013542518040600453, + "loss": 0.6946, + "step": 3211 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001353898670568975, + "loss": 0.5054, + "step": 3212 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013535454866196739, + "loss": 0.4495, + "step": 3213 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013531922522624982, + "loss": 0.5138, + "step": 3214 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352838967547812, + "loss": 0.4706, + "step": 3215 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013524856325259848, + "loss": 0.5193, + "step": 3216 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352132247247396, + "loss": 0.4436, + "step": 3217 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013517788117624292, + "loss": 0.4139, + "step": 3218 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001351425326121478, + "loss": 0.5937, + "step": 3219 + }, + { + "epoch": 3.82, + "learning_rate": 0.000135107179037494, + "loss": 0.3375, + "step": 3220 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013507182045732234, + "loss": 0.3712, + "step": 3221 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013503645687667408, + "loss": 0.3424, + "step": 3222 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013500108830059133, + "loss": 0.3333, + "step": 3223 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013496571473411688, + "loss": 0.4042, + "step": 3224 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013493033618229417, + "loss": 0.4963, + "step": 3225 + }, + { + "epoch": 3.83, + "learning_rate": 0.0001348949526501675, + "loss": 0.3946, + "step": 3226 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013485956414278178, + "loss": 0.5807, + "step": 3227 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013482417066518256, + "loss": 0.4561, + "step": 3228 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013478877222241627, + "loss": 0.4964, + "step": 3229 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013475336881952986, + "loss": 0.6429, + "step": 3230 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013471796046157116, + "loss": 0.5466, + "step": 3231 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013468254715358861, + "loss": 0.3882, + "step": 3232 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013464712890063138, + "loss": 0.5006, + "step": 3233 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001346117057077493, + "loss": 0.494, + "step": 3234 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013457627757999303, + "loss": 0.5444, + "step": 3235 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013454084452241372, + "loss": 0.3714, + "step": 3236 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013450540654006348, + "loss": 0.3335, + "step": 3237 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001344699636379949, + "loss": 0.4771, + "step": 3238 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013443451582126144, + "loss": 0.466, + "step": 3239 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013439906309491712, + "loss": 0.5537, + "step": 3240 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013436360546401676, + "loss": 0.5899, + "step": 3241 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013432814293361584, + "loss": 0.443, + "step": 3242 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013429267550877055, + "loss": 0.4238, + "step": 3243 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013425720319453773, + "loss": 0.6529, + "step": 3244 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013422172599597505, + "loss": 0.6163, + "step": 3245 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013418624391814068, + "loss": 0.5183, + "step": 3246 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013415075696609363, + "loss": 0.7659, + "step": 3247 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001341152651448936, + "loss": 0.3717, + "step": 3248 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340797684596009, + "loss": 0.6885, + "step": 3249 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340442669152766, + "loss": 0.4483, + "step": 3250 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340087605169825, + "loss": 0.3417, + "step": 3251 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013397324926978094, + "loss": 0.4751, + "step": 3252 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013393773317873508, + "loss": 0.4448, + "step": 3253 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013390221224890878, + "loss": 0.6278, + "step": 3254 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013386668648536655, + "loss": 0.2995, + "step": 3255 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013383115589317353, + "loss": 0.535, + "step": 3256 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013379562047739568, + "loss": 0.4972, + "step": 3257 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013376008024309948, + "loss": 0.4821, + "step": 3258 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001337245351953523, + "loss": 0.392, + "step": 3259 + }, + { + "epoch": 3.87, + "learning_rate": 0.000133688985339222, + "loss": 0.413, + "step": 3260 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013365343067977726, + "loss": 0.4689, + "step": 3261 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013361787122208744, + "loss": 0.4737, + "step": 3262 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013358230697122246, + "loss": 0.5033, + "step": 3263 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013354673793225302, + "loss": 0.4901, + "step": 3264 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013351116411025054, + "loss": 0.5776, + "step": 3265 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013347558551028702, + "loss": 0.5005, + "step": 3266 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013344000213743522, + "loss": 0.6475, + "step": 3267 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013340441399676856, + "loss": 0.4394, + "step": 3268 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333688210933611, + "loss": 0.4351, + "step": 3269 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333332234322876, + "loss": 0.4526, + "step": 3270 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001332976210186236, + "loss": 0.3006, + "step": 3271 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013326201385744518, + "loss": 0.382, + "step": 3272 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013322640195382907, + "loss": 0.3488, + "step": 3273 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013319078531285285, + "loss": 0.5538, + "step": 3274 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013315516393959463, + "loss": 0.5328, + "step": 3275 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013311953783913324, + "loss": 0.5216, + "step": 3276 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330839070165482, + "loss": 0.3845, + "step": 3277 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330482714769197, + "loss": 0.5293, + "step": 3278 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013301263122532855, + "loss": 0.5415, + "step": 3279 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001329769862668563, + "loss": 0.5309, + "step": 3280 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013294133660658516, + "loss": 0.4629, + "step": 3281 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013290568224959794, + "loss": 0.4329, + "step": 3282 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013287002320097821, + "loss": 0.3973, + "step": 3283 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001328343594658102, + "loss": 0.3417, + "step": 3284 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013279869104917873, + "loss": 0.4784, + "step": 3285 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013276301795616936, + "loss": 0.3668, + "step": 3286 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001327273401918683, + "loss": 0.3726, + "step": 3287 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013269165776136238, + "loss": 0.518, + "step": 3288 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013265597066973922, + "loss": 0.3864, + "step": 3289 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013262027892208694, + "loss": 0.4249, + "step": 3290 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013258458252349444, + "loss": 0.395, + "step": 3291 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013254888147905126, + "loss": 0.8359, + "step": 3292 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013251317579384756, + "loss": 0.5028, + "step": 3293 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324774654729742, + "loss": 0.4216, + "step": 3294 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324417505215227, + "loss": 0.6145, + "step": 3295 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013240603094458522, + "loss": 0.6158, + "step": 3296 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013237030674725464, + "loss": 0.5101, + "step": 3297 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001323345779346244, + "loss": 0.6933, + "step": 3298 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322988445117886, + "loss": 0.4192, + "step": 3299 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322631064838422, + "loss": 0.4549, + "step": 3300 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013222736385588054, + "loss": 0.4947, + "step": 3301 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013219161663299982, + "loss": 0.5383, + "step": 3302 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013215586482029669, + "loss": 0.4919, + "step": 3303 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001321201084228687, + "loss": 0.4603, + "step": 3304 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013208434744581385, + "loss": 0.3127, + "step": 3305 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013204858189423097, + "loss": 0.754, + "step": 3306 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013201281177321935, + "loss": 0.3746, + "step": 3307 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013197703708787913, + "loss": 0.5576, + "step": 3308 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001319412578433109, + "loss": 0.4992, + "step": 3309 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013190547404461598, + "loss": 0.4533, + "step": 3310 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001318696856968965, + "loss": 0.4155, + "step": 3311 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013183389280525497, + "loss": 0.3661, + "step": 3312 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013179809537479476, + "loss": 0.4512, + "step": 3313 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013176229341061975, + "loss": 0.5895, + "step": 3314 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013172648691783454, + "loss": 0.3308, + "step": 3315 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013169067590154432, + "loss": 0.4128, + "step": 3316 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013165486036685503, + "loss": 0.5432, + "step": 3317 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001316190403188731, + "loss": 0.4297, + "step": 3318 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013158321576270575, + "loss": 0.4259, + "step": 3319 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315473867034608, + "loss": 0.4428, + "step": 3320 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315115531462466, + "loss": 0.6495, + "step": 3321 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013147571509617228, + "loss": 0.5706, + "step": 3322 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314398725583476, + "loss": 0.3647, + "step": 3323 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314040255378829, + "loss": 0.4864, + "step": 3324 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013136817403988917, + "loss": 0.4197, + "step": 3325 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013133231806947805, + "loss": 0.4818, + "step": 3326 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013129645763176184, + "loss": 0.4201, + "step": 3327 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312605927318534, + "loss": 0.4352, + "step": 3328 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312247233748664, + "loss": 0.2785, + "step": 3329 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001311888495659149, + "loss": 0.4424, + "step": 3330 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013115297131011382, + "loss": 0.4258, + "step": 3331 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013111708861257855, + "loss": 0.4332, + "step": 3332 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013108120147842519, + "loss": 0.3578, + "step": 3333 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001310453099127705, + "loss": 0.4219, + "step": 3334 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001310094139207318, + "loss": 0.5837, + "step": 3335 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001309735135074271, + "loss": 0.3965, + "step": 3336 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013093760867797502, + "loss": 0.4764, + "step": 3337 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013090169943749476, + "loss": 0.4933, + "step": 3338 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013086578579110623, + "loss": 0.3434, + "step": 3339 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001308298677439299, + "loss": 0.5931, + "step": 3340 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013079394530108695, + "loss": 0.442, + "step": 3341 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001307580184676991, + "loss": 0.3229, + "step": 3342 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001307220872488888, + "loss": 0.4567, + "step": 3343 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013068615164977895, + "loss": 0.6224, + "step": 3344 + }, + { + "epoch": 3.97, + "eval_loss": 2.954587936401367, + "eval_runtime": 283.9817, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 3344 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013065021167549322, + "loss": 0.6767, + "step": 3345 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001306142673311559, + "loss": 0.4809, + "step": 3346 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013057831862189187, + "loss": 0.4563, + "step": 3347 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013054236555282657, + "loss": 0.4674, + "step": 3348 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013050640812908623, + "loss": 0.6636, + "step": 3349 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013047044635579747, + "loss": 0.4652, + "step": 3350 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013043448023808773, + "loss": 0.3912, + "step": 3351 + }, + { + "epoch": 3.98, + "learning_rate": 0.000130398509781085, + "loss": 0.6064, + "step": 3352 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013036253498991787, + "loss": 0.5975, + "step": 3353 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013032655586971552, + "loss": 0.7249, + "step": 3354 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013029057242560784, + "loss": 0.4604, + "step": 3355 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013025458466272525, + "loss": 0.4895, + "step": 3356 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001302185925861988, + "loss": 0.3628, + "step": 3357 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013018259620116025, + "loss": 0.4798, + "step": 3358 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013014659551274189, + "loss": 0.663, + "step": 3359 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013011059052607656, + "loss": 0.3923, + "step": 3360 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013007458124629785, + "loss": 0.5601, + "step": 3361 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013003856767853983, + "loss": 0.67, + "step": 3362 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013000254982793735, + "loss": 0.5059, + "step": 3363 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012996652769962566, + "loss": 0.4992, + "step": 3364 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012993050129874082, + "loss": 0.6196, + "step": 3365 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012989447063041935, + "loss": 0.4157, + "step": 3366 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012985843569979848, + "loss": 0.5714, + "step": 3367 + }, + { + "epoch": 4.0, + "learning_rate": 0.000129822396512016, + "loss": 0.7484, + "step": 3368 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012978635307221026, + "loss": 0.3928, + "step": 3369 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012975030538552032, + "loss": 0.4129, + "step": 3370 + }, + { + "epoch": 4.0, + "learning_rate": 0.0001297142534570858, + "loss": 0.5407, + "step": 3371 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012967819729204684, + "loss": 0.479, + "step": 3372 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012964213689554437, + "loss": 0.4492, + "step": 3373 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012960607227271973, + "loss": 0.4574, + "step": 3374 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012957000342871502, + "loss": 0.7554, + "step": 3375 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012953393036867282, + "loss": 0.3038, + "step": 3376 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001294978530977364, + "loss": 0.5125, + "step": 3377 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001294617716210495, + "loss": 0.7192, + "step": 3378 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012942568594375667, + "loss": 0.4371, + "step": 3379 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012938959607100288, + "loss": 0.3672, + "step": 3380 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012935350200793378, + "loss": 0.4752, + "step": 3381 + }, + { + "epoch": 4.0, + "learning_rate": 0.0001293174037596956, + "loss": 0.225, + "step": 3382 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012928130133143512, + "loss": 0.2106, + "step": 3383 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012924519472829978, + "loss": 0.213, + "step": 3384 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001292090839554376, + "loss": 0.2775, + "step": 3385 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001291729690179972, + "loss": 0.2417, + "step": 3386 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001291368499211278, + "loss": 0.2212, + "step": 3387 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012910072666997912, + "loss": 0.2644, + "step": 3388 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012906459926970162, + "loss": 0.2206, + "step": 3389 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012902846772544624, + "loss": 0.2238, + "step": 3390 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012899233204236455, + "loss": 0.2212, + "step": 3391 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012895619222560878, + "loss": 0.2082, + "step": 3392 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012892004828033156, + "loss": 0.2896, + "step": 3393 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012888390021168636, + "loss": 0.2351, + "step": 3394 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012884774802482697, + "loss": 0.2263, + "step": 3395 + }, + { + "epoch": 4.02, + "learning_rate": 0.000128811591724908, + "loss": 0.2243, + "step": 3396 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001287754313170845, + "loss": 0.2433, + "step": 3397 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012873926680651222, + "loss": 0.2566, + "step": 3398 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012870309819834735, + "loss": 0.2537, + "step": 3399 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012866692549774682, + "loss": 0.298, + "step": 3400 + }, + { + "epoch": 4.03, + "learning_rate": 0.000128630748709868, + "loss": 0.2246, + "step": 3401 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012859456783986893, + "loss": 0.2179, + "step": 3402 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012855838289290821, + "loss": 0.2394, + "step": 3403 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001285221938741451, + "loss": 0.2068, + "step": 3404 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012848600078873925, + "loss": 0.1961, + "step": 3405 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012844980364185108, + "loss": 0.2719, + "step": 3406 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012841360243864147, + "loss": 0.2009, + "step": 3407 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012837739718427196, + "loss": 0.2343, + "step": 3408 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012834118788390456, + "loss": 0.3161, + "step": 3409 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012830497454270205, + "loss": 0.1992, + "step": 3410 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012826875716582755, + "loss": 0.261, + "step": 3411 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012823253575844495, + "loss": 0.2403, + "step": 3412 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012819631032571854, + "loss": 0.2271, + "step": 3413 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012816008087281332, + "loss": 0.2062, + "step": 3414 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012812384740489485, + "loss": 0.2133, + "step": 3415 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012808760992712924, + "loss": 0.2372, + "step": 3416 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012805136844468309, + "loss": 0.2466, + "step": 3417 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012801512296272368, + "loss": 0.2456, + "step": 3418 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012797887348641883, + "loss": 0.2171, + "step": 3419 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012794262002093697, + "loss": 0.3038, + "step": 3420 + }, + { + "epoch": 4.05, + "learning_rate": 0.000127906362571447, + "loss": 0.1868, + "step": 3421 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012787010114311844, + "loss": 0.2611, + "step": 3422 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012783383574112138, + "loss": 0.2131, + "step": 3423 + }, + { + "epoch": 4.05, + "learning_rate": 0.0001277975663706265, + "loss": 0.2005, + "step": 3424 + }, + { + "epoch": 4.05, + "learning_rate": 0.000127761293036805, + "loss": 0.2455, + "step": 3425 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001277250157448287, + "loss": 0.2837, + "step": 3426 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012768873449986988, + "loss": 0.2252, + "step": 3427 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012765244930710155, + "loss": 0.211, + "step": 3428 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012761616017169708, + "loss": 0.1831, + "step": 3429 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001275798670988306, + "loss": 0.1985, + "step": 3430 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012754357009367665, + "loss": 0.2341, + "step": 3431 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012750726916141046, + "loss": 0.2395, + "step": 3432 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012747096430720765, + "loss": 0.2183, + "step": 3433 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001274346555362446, + "loss": 0.2698, + "step": 3434 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012739834285369807, + "loss": 0.2104, + "step": 3435 + }, + { + "epoch": 4.07, + "learning_rate": 0.0001273620262647455, + "loss": 0.2395, + "step": 3436 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012732570577456484, + "loss": 0.2218, + "step": 3437 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012728938138833462, + "loss": 0.2337, + "step": 3438 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012725305311123386, + "loss": 0.1958, + "step": 3439 + }, + { + "epoch": 4.07, + "learning_rate": 0.0001272167209484422, + "loss": 0.2767, + "step": 3440 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012718038490513984, + "loss": 0.2238, + "step": 3441 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012714404498650743, + "loss": 0.2931, + "step": 3442 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012710770119772632, + "loss": 0.3166, + "step": 3443 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012707135354397836, + "loss": 0.1985, + "step": 3444 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012703500203044586, + "loss": 0.2208, + "step": 3445 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001269986466623118, + "loss": 0.2279, + "step": 3446 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012696228744475964, + "loss": 0.2656, + "step": 3447 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012692592438297341, + "loss": 0.2181, + "step": 3448 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012688955748213772, + "loss": 0.1994, + "step": 3449 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001268531867474377, + "loss": 0.2818, + "step": 3450 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012681681218405897, + "loss": 0.2277, + "step": 3451 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012678043379718782, + "loss": 0.2692, + "step": 3452 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012674405159201091, + "loss": 0.2664, + "step": 3453 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012670766557371565, + "loss": 0.2008, + "step": 3454 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012667127574748986, + "loss": 0.2382, + "step": 3455 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001266348821185219, + "loss": 0.2454, + "step": 3456 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001265984846920008, + "loss": 0.3547, + "step": 3457 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012656208347311594, + "loss": 0.2115, + "step": 3458 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012652567846705736, + "loss": 0.1929, + "step": 3459 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012648926967901567, + "loss": 0.2076, + "step": 3460 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012645285711418194, + "loss": 0.2045, + "step": 3461 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012641644077774776, + "loss": 0.2378, + "step": 3462 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001263800206749054, + "loss": 0.2674, + "step": 3463 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012634359681084752, + "loss": 0.2125, + "step": 3464 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012630716919076736, + "loss": 0.2097, + "step": 3465 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001262707378198587, + "loss": 0.2352, + "step": 3466 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001262343027033159, + "loss": 0.2105, + "step": 3467 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012619786384633375, + "loss": 0.2207, + "step": 3468 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001261614212541077, + "loss": 0.304, + "step": 3469 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012612497493183364, + "loss": 0.2239, + "step": 3470 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012608852488470802, + "loss": 0.2875, + "step": 3471 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001260520711179278, + "loss": 0.3197, + "step": 3472 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012601561363669058, + "loss": 0.1942, + "step": 3473 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012597915244619428, + "loss": 0.2117, + "step": 3474 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012594268755163754, + "loss": 0.2222, + "step": 3475 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012590621895821943, + "loss": 0.1871, + "step": 3476 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001258697466711396, + "loss": 0.2146, + "step": 3477 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001258332706955982, + "loss": 0.3307, + "step": 3478 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012579679103679592, + "loss": 0.2175, + "step": 3479 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012576030769993393, + "loss": 0.2976, + "step": 3480 + }, + { + "epoch": 4.12, + "learning_rate": 0.000125723820690214, + "loss": 0.2031, + "step": 3481 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012568733001283827, + "loss": 0.2046, + "step": 3482 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001256508356730097, + "loss": 0.2642, + "step": 3483 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012561433767593145, + "loss": 0.2088, + "step": 3484 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001255778360268074, + "loss": 0.2458, + "step": 3485 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001255413307308418, + "loss": 0.2237, + "step": 3486 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012550482179323963, + "loss": 0.2696, + "step": 3487 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012546830921920617, + "loss": 0.2078, + "step": 3488 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012543179301394744, + "loss": 0.2199, + "step": 3489 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001253952731826697, + "loss": 0.2258, + "step": 3490 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012535874973057997, + "loss": 0.1981, + "step": 3491 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001253222226628857, + "loss": 0.3252, + "step": 3492 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012528569198479481, + "loss": 0.2717, + "step": 3493 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001252491577015158, + "loss": 0.248, + "step": 3494 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012521261981825768, + "loss": 0.2725, + "step": 3495 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012517607834022993, + "loss": 0.2203, + "step": 3496 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001251395332726426, + "loss": 0.2461, + "step": 3497 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012510298462070619, + "loss": 0.3018, + "step": 3498 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001250664323896317, + "loss": 0.2329, + "step": 3499 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012502987658463075, + "loss": 0.221, + "step": 3500 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012499331721091544, + "loss": 0.2812, + "step": 3501 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012495675427369823, + "loss": 0.2846, + "step": 3502 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012492018777819226, + "loss": 0.2447, + "step": 3503 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001248836177296111, + "loss": 0.1969, + "step": 3504 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012484704413316878, + "loss": 0.2045, + "step": 3505 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012481046699408004, + "loss": 0.1862, + "step": 3506 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012477388631755985, + "loss": 0.23, + "step": 3507 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001247373021088239, + "loss": 0.2972, + "step": 3508 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012470071437308827, + "loss": 0.2222, + "step": 3509 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012466412311556952, + "loss": 0.2262, + "step": 3510 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012462752834148486, + "loss": 0.3642, + "step": 3511 + }, + { + "epoch": 4.16, + "learning_rate": 0.0001245909300560518, + "loss": 0.2221, + "step": 3512 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012455432826448862, + "loss": 0.2607, + "step": 3513 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012451772297201376, + "loss": 0.2396, + "step": 3514 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012448111418384645, + "loss": 0.2034, + "step": 3515 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012444450190520623, + "loss": 0.2404, + "step": 3516 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012440788614131329, + "loss": 0.2029, + "step": 3517 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012437126689738816, + "loss": 0.2128, + "step": 3518 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012433464417865202, + "loss": 0.2857, + "step": 3519 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001242980179903264, + "loss": 0.2931, + "step": 3520 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012426138833763342, + "loss": 0.2319, + "step": 3521 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012422475522579573, + "loss": 0.2272, + "step": 3522 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012418811866003632, + "loss": 0.2498, + "step": 3523 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012415147864557884, + "loss": 0.1993, + "step": 3524 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001241148351876473, + "loss": 0.2329, + "step": 3525 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001240781882914663, + "loss": 0.2228, + "step": 3526 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012404153796226087, + "loss": 0.2228, + "step": 3527 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012400488420525653, + "loss": 0.2277, + "step": 3528 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001239682270256793, + "loss": 0.2344, + "step": 3529 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001239315664287558, + "loss": 0.2043, + "step": 3530 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001238949024197129, + "loss": 0.2143, + "step": 3531 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012385823500377812, + "loss": 0.2054, + "step": 3532 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012382156418617947, + "loss": 0.2191, + "step": 3533 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001237848899721454, + "loss": 0.2199, + "step": 3534 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012374821236690482, + "loss": 0.1899, + "step": 3535 + }, + { + "epoch": 4.19, + "learning_rate": 0.0001237115313756872, + "loss": 0.2206, + "step": 3536 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012367484700372242, + "loss": 0.2107, + "step": 3537 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012363815925624087, + "loss": 0.1904, + "step": 3538 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012360146813847345, + "loss": 0.2259, + "step": 3539 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012356477365565148, + "loss": 0.2488, + "step": 3540 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012352807581300678, + "loss": 0.3026, + "step": 3541 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012349137461577174, + "loss": 0.2141, + "step": 3542 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012345467006917907, + "loss": 0.2183, + "step": 3543 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012341796217846208, + "loss": 0.2978, + "step": 3544 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001233812509488545, + "loss": 0.2255, + "step": 3545 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012334453638559057, + "loss": 0.2209, + "step": 3546 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012330781849390494, + "loss": 0.2464, + "step": 3547 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012327109727903283, + "loss": 0.2259, + "step": 3548 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012323437274620983, + "loss": 0.209, + "step": 3549 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001231976449006721, + "loss": 0.2424, + "step": 3550 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012316091374765624, + "loss": 0.2162, + "step": 3551 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001231241792923993, + "loss": 0.2442, + "step": 3552 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012308744154013878, + "loss": 0.2061, + "step": 3553 + }, + { + "epoch": 4.21, + "eval_loss": 3.390720844268799, + "eval_runtime": 283.8935, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3553 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012305070049611273, + "loss": 0.1838, + "step": 3554 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012301395616555957, + "loss": 0.197, + "step": 3555 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001229772085537183, + "loss": 0.2479, + "step": 3556 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012294045766582823, + "loss": 0.3272, + "step": 3557 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012290370350712937, + "loss": 0.2301, + "step": 3558 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012286694608286197, + "loss": 0.2367, + "step": 3559 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012283018539826685, + "loss": 0.2419, + "step": 3560 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001227934214585853, + "loss": 0.2605, + "step": 3561 + }, + { + "epoch": 4.22, + "learning_rate": 0.000122756654269059, + "loss": 0.2084, + "step": 3562 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012271988383493024, + "loss": 0.2414, + "step": 3563 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012268311016144163, + "loss": 0.2206, + "step": 3564 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001226463332538363, + "loss": 0.2012, + "step": 3565 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012260955311735786, + "loss": 0.1884, + "step": 3566 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012257276975725028, + "loss": 0.2155, + "step": 3567 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001225359831787581, + "loss": 0.2375, + "step": 3568 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012249919338712636, + "loss": 0.2713, + "step": 3569 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012246240038760043, + "loss": 0.2414, + "step": 3570 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012242560418542612, + "loss": 0.2209, + "step": 3571 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012238880478584985, + "loss": 0.2318, + "step": 3572 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012235200219411836, + "loss": 0.2858, + "step": 3573 + }, + { + "epoch": 4.23, + "learning_rate": 0.000122315196415479, + "loss": 0.2468, + "step": 3574 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012227838745517932, + "loss": 0.2166, + "step": 3575 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001222415753184676, + "loss": 0.2349, + "step": 3576 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012220476001059238, + "loss": 0.2486, + "step": 3577 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012216794153680274, + "loss": 0.234, + "step": 3578 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012213111990234815, + "loss": 0.2008, + "step": 3579 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012209429511247864, + "loss": 0.2548, + "step": 3580 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001220574671724446, + "loss": 0.2562, + "step": 3581 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001220206360874969, + "loss": 0.2586, + "step": 3582 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001219838018628868, + "loss": 0.2428, + "step": 3583 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012194696450386608, + "loss": 0.2159, + "step": 3584 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012191012401568698, + "loss": 0.2544, + "step": 3585 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001218732804036021, + "loss": 0.2396, + "step": 3586 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012183643367286462, + "loss": 0.2335, + "step": 3587 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012179958382872796, + "loss": 0.2275, + "step": 3588 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012176273087644619, + "loss": 0.2291, + "step": 3589 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001217258748212737, + "loss": 0.2272, + "step": 3590 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012168901566846535, + "loss": 0.2135, + "step": 3591 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012165215342327648, + "loss": 0.22, + "step": 3592 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012161528809096285, + "loss": 0.2577, + "step": 3593 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012157841967678063, + "loss": 0.2006, + "step": 3594 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012154154818598647, + "loss": 0.2322, + "step": 3595 + }, + { + "epoch": 4.26, + "learning_rate": 0.0001215046736238374, + "loss": 0.196, + "step": 3596 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012146779599559095, + "loss": 0.2267, + "step": 3597 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012143091530650508, + "loss": 0.2416, + "step": 3598 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012139403156183817, + "loss": 0.2585, + "step": 3599 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012135714476684903, + "loss": 0.2644, + "step": 3600 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012132025492679693, + "loss": 0.2355, + "step": 3601 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012128336204694148, + "loss": 0.2363, + "step": 3602 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012124646613254291, + "loss": 0.2476, + "step": 3603 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001212095671888617, + "loss": 0.2185, + "step": 3604 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012117266522115889, + "loss": 0.2233, + "step": 3605 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012113576023469582, + "loss": 0.2084, + "step": 3606 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012109885223473439, + "loss": 0.2439, + "step": 3607 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012106194122653684, + "loss": 0.2409, + "step": 3608 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012102502721536595, + "loss": 0.2183, + "step": 3609 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012098811020648475, + "loss": 0.2595, + "step": 3610 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012095119020515691, + "loss": 0.2135, + "step": 3611 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001209142672166463, + "loss": 0.2125, + "step": 3612 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012087734124621742, + "loss": 0.2017, + "step": 3613 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012084041229913505, + "loss": 0.2163, + "step": 3614 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012080348038066452, + "loss": 0.2198, + "step": 3615 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012076654549607145, + "loss": 0.2234, + "step": 3616 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012072960765062197, + "loss": 0.2201, + "step": 3617 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012069266684958265, + "loss": 0.245, + "step": 3618 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012065572309822037, + "loss": 0.2067, + "step": 3619 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012061877640180255, + "loss": 0.2284, + "step": 3620 + }, + { + "epoch": 4.29, + "learning_rate": 0.000120581826765597, + "loss": 0.2323, + "step": 3621 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012054487419487188, + "loss": 0.2162, + "step": 3622 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012050791869489586, + "loss": 0.2131, + "step": 3623 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012047096027093798, + "loss": 0.2168, + "step": 3624 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012043399892826768, + "loss": 0.2293, + "step": 3625 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012039703467215488, + "loss": 0.2202, + "step": 3626 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012036006750786985, + "loss": 0.2288, + "step": 3627 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012032309744068334, + "loss": 0.2606, + "step": 3628 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012028612447586643, + "loss": 0.2754, + "step": 3629 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012024914861869063, + "loss": 0.239, + "step": 3630 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012021216987442798, + "loss": 0.2312, + "step": 3631 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012017518824835077, + "loss": 0.2299, + "step": 3632 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012013820374573184, + "loss": 0.2214, + "step": 3633 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012010121637184428, + "loss": 0.2492, + "step": 3634 + }, + { + "epoch": 4.31, + "learning_rate": 0.00012006422613196178, + "loss": 0.2659, + "step": 3635 + }, + { + "epoch": 4.31, + "learning_rate": 0.00012002723303135826, + "loss": 0.23, + "step": 3636 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011999023707530819, + "loss": 0.287, + "step": 3637 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011995323826908635, + "loss": 0.2204, + "step": 3638 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011991623661796798, + "loss": 0.2277, + "step": 3639 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011987923212722872, + "loss": 0.2436, + "step": 3640 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011984222480214456, + "loss": 0.2074, + "step": 3641 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011980521464799198, + "loss": 0.2212, + "step": 3642 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011976820167004779, + "loss": 0.2147, + "step": 3643 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011973118587358928, + "loss": 0.2271, + "step": 3644 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011969416726389404, + "loss": 0.2498, + "step": 3645 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011965714584624012, + "loss": 0.2171, + "step": 3646 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011962012162590601, + "loss": 0.2276, + "step": 3647 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011958309460817052, + "loss": 0.2089, + "step": 3648 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011954606479831291, + "loss": 0.2691, + "step": 3649 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011950903220161285, + "loss": 0.2229, + "step": 3650 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011947199682335031, + "loss": 0.2315, + "step": 3651 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001194349586688058, + "loss": 0.2208, + "step": 3652 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001193979177432601, + "loss": 0.2159, + "step": 3653 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011936087405199446, + "loss": 0.2781, + "step": 3654 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011932382760029049, + "loss": 0.2142, + "step": 3655 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011928677839343026, + "loss": 0.2275, + "step": 3656 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001192497264366961, + "loss": 0.2718, + "step": 3657 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011921267173537086, + "loss": 0.1947, + "step": 3658 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011917561429473771, + "loss": 0.2361, + "step": 3659 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011913855412008023, + "loss": 0.1999, + "step": 3660 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011910149121668241, + "loss": 0.2199, + "step": 3661 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011906442558982865, + "loss": 0.2217, + "step": 3662 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001190273572448036, + "loss": 0.2263, + "step": 3663 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011899028618689247, + "loss": 0.2216, + "step": 3664 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011895321242138075, + "loss": 0.2298, + "step": 3665 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001189161359535544, + "loss": 0.2332, + "step": 3666 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011887905678869966, + "loss": 0.2955, + "step": 3667 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011884197493210328, + "loss": 0.2352, + "step": 3668 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011880489038905223, + "loss": 0.2104, + "step": 3669 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011876780316483401, + "loss": 0.2897, + "step": 3670 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011873071326473644, + "loss": 0.2041, + "step": 3671 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011869362069404775, + "loss": 0.2242, + "step": 3672 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001186565254580565, + "loss": 0.2015, + "step": 3673 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011861942756205169, + "loss": 0.2716, + "step": 3674 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011858232701132264, + "loss": 0.2504, + "step": 3675 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011854522381115908, + "loss": 0.1846, + "step": 3676 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011850811796685117, + "loss": 0.207, + "step": 3677 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011847100948368937, + "loss": 0.2228, + "step": 3678 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011843389836696447, + "loss": 0.2365, + "step": 3679 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011839678462196784, + "loss": 0.2159, + "step": 3680 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011835966825399096, + "loss": 0.2413, + "step": 3681 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011832254926832586, + "loss": 0.2596, + "step": 3682 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011828542767026493, + "loss": 0.2041, + "step": 3683 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011824830346510089, + "loss": 0.2512, + "step": 3684 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011821117665812682, + "loss": 0.2165, + "step": 3685 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011817404725463618, + "loss": 0.2125, + "step": 3686 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011813691525992286, + "loss": 0.2557, + "step": 3687 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011809978067928102, + "loss": 0.2088, + "step": 3688 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011806264351800526, + "loss": 0.2093, + "step": 3689 + }, + { + "epoch": 4.37, + "learning_rate": 0.0001180255037813906, + "loss": 0.2217, + "step": 3690 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011798836147473225, + "loss": 0.2681, + "step": 3691 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011795121660332593, + "loss": 0.2257, + "step": 3692 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001179140691724677, + "loss": 0.2422, + "step": 3693 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011787691918745396, + "loss": 0.3328, + "step": 3694 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001178397666535815, + "loss": 0.233, + "step": 3695 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011780261157614747, + "loss": 0.243, + "step": 3696 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011776545396044936, + "loss": 0.2089, + "step": 3697 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011772829381178502, + "loss": 0.2143, + "step": 3698 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011769113113545267, + "loss": 0.2135, + "step": 3699 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011765396593675097, + "loss": 0.2403, + "step": 3700 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011761679822097877, + "loss": 0.2182, + "step": 3701 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011757962799343547, + "loss": 0.2159, + "step": 3702 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011754245525942065, + "loss": 0.2098, + "step": 3703 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011750528002423437, + "loss": 0.2264, + "step": 3704 + }, + { + "epoch": 4.39, + "learning_rate": 0.000117468102293177, + "loss": 0.2023, + "step": 3705 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011743092207154929, + "loss": 0.2978, + "step": 3706 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001173937393646523, + "loss": 0.2311, + "step": 3707 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001173565541777875, + "loss": 0.244, + "step": 3708 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011731936651625668, + "loss": 0.2058, + "step": 3709 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011728217638536197, + "loss": 0.3039, + "step": 3710 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011724498379040587, + "loss": 0.2142, + "step": 3711 + }, + { + "epoch": 4.4, + "learning_rate": 0.0001172077887366913, + "loss": 0.2262, + "step": 3712 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011717059122952136, + "loss": 0.2304, + "step": 3713 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011713339127419969, + "loss": 0.2093, + "step": 3714 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011709618887603014, + "loss": 0.2083, + "step": 3715 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011705898404031697, + "loss": 0.3559, + "step": 3716 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011702177677236479, + "loss": 0.2728, + "step": 3717 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011698456707747854, + "loss": 0.246, + "step": 3718 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011694735496096354, + "loss": 0.2031, + "step": 3719 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011691014042812536, + "loss": 0.2049, + "step": 3720 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011687292348427004, + "loss": 0.248, + "step": 3721 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011683570413470383, + "loss": 0.2189, + "step": 3722 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011679848238473352, + "loss": 0.2302, + "step": 3723 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011676125823966602, + "loss": 0.2839, + "step": 3724 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011672403170480872, + "loss": 0.2359, + "step": 3725 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011668680278546929, + "loss": 0.2288, + "step": 3726 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001166495714869558, + "loss": 0.2718, + "step": 3727 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011661233781457654, + "loss": 0.1967, + "step": 3728 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011657510177364032, + "loss": 0.2098, + "step": 3729 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011653786336945614, + "loss": 0.2466, + "step": 3730 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011650062260733339, + "loss": 0.2207, + "step": 3731 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011646337949258175, + "loss": 0.2124, + "step": 3732 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011642613403051133, + "loss": 0.213, + "step": 3733 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011638888622643249, + "loss": 0.2276, + "step": 3734 + }, + { + "epoch": 4.43, + "learning_rate": 0.000116351636085656, + "loss": 0.2206, + "step": 3735 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011631438361349287, + "loss": 0.2382, + "step": 3736 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011627712881525452, + "loss": 0.2264, + "step": 3737 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011623987169625261, + "loss": 0.2392, + "step": 3738 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011620261226179927, + "loss": 0.2139, + "step": 3739 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011616535051720685, + "loss": 0.2103, + "step": 3740 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011612808646778806, + "loss": 0.211, + "step": 3741 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011609082011885592, + "loss": 0.2227, + "step": 3742 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011605355147572387, + "loss": 0.2459, + "step": 3743 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011601628054370553, + "loss": 0.2312, + "step": 3744 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011597900732811496, + "loss": 0.2244, + "step": 3745 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011594173183426647, + "loss": 0.2168, + "step": 3746 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011590445406747479, + "loss": 0.2711, + "step": 3747 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011586717403305487, + "loss": 0.1865, + "step": 3748 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011582989173632206, + "loss": 0.3104, + "step": 3749 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011579260718259197, + "loss": 0.2245, + "step": 3750 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011575532037718057, + "loss": 0.2316, + "step": 3751 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011571803132540418, + "loss": 0.2328, + "step": 3752 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011568074003257938, + "loss": 0.267, + "step": 3753 + }, + { + "epoch": 4.45, + "learning_rate": 0.0001156434465040231, + "loss": 0.2131, + "step": 3754 + }, + { + "epoch": 4.45, + "learning_rate": 0.0001156061507450526, + "loss": 0.1945, + "step": 3755 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011556885276098536, + "loss": 0.2344, + "step": 3756 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011553155255713937, + "loss": 0.2221, + "step": 3757 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011549425013883275, + "loss": 0.2098, + "step": 3758 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011545694551138409, + "loss": 0.2329, + "step": 3759 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011541963868011212, + "loss": 0.2187, + "step": 3760 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011538232965033601, + "loss": 0.1928, + "step": 3761 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011534501842737527, + "loss": 0.2103, + "step": 3762 + }, + { + "epoch": 4.46, + "eval_loss": 3.44382643699646, + "eval_runtime": 283.899, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3762 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011530770501654959, + "loss": 0.2563, + "step": 3763 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011527038942317911, + "loss": 0.1922, + "step": 3764 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011523307165258419, + "loss": 0.2246, + "step": 3765 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011519575171008552, + "loss": 0.2243, + "step": 3766 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011515842960100411, + "loss": 0.2481, + "step": 3767 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011512110533066132, + "loss": 0.2135, + "step": 3768 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011508377890437874, + "loss": 0.2019, + "step": 3769 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011504645032747832, + "loss": 0.2537, + "step": 3770 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011500911960528229, + "loss": 0.2131, + "step": 3771 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011497178674311317, + "loss": 0.2421, + "step": 3772 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011493445174629386, + "loss": 0.2012, + "step": 3773 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011489711462014751, + "loss": 0.2144, + "step": 3774 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011485977536999757, + "loss": 0.2411, + "step": 3775 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011482243400116779, + "loss": 0.192, + "step": 3776 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011478509051898225, + "loss": 0.2245, + "step": 3777 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011474774492876532, + "loss": 0.241, + "step": 3778 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011471039723584162, + "loss": 0.2172, + "step": 3779 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011467304744553618, + "loss": 0.2308, + "step": 3780 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011463569556317424, + "loss": 0.2523, + "step": 3781 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011459834159408137, + "loss": 0.216, + "step": 3782 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011456098554358342, + "loss": 0.2098, + "step": 3783 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011452362741700655, + "loss": 0.2101, + "step": 3784 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011448626721967717, + "loss": 0.3598, + "step": 3785 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011444890495692213, + "loss": 0.2131, + "step": 3786 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011441154063406841, + "loss": 0.3067, + "step": 3787 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011437417425644337, + "loss": 0.2866, + "step": 3788 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011433680582937461, + "loss": 0.2688, + "step": 3789 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011429943535819005, + "loss": 0.2286, + "step": 3790 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011426206284821792, + "loss": 0.215, + "step": 3791 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011422468830478679, + "loss": 0.2293, + "step": 3792 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011418731173322532, + "loss": 0.2614, + "step": 3793 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011414993313886272, + "loss": 0.2223, + "step": 3794 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011411255252702829, + "loss": 0.2415, + "step": 3795 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011407516990305169, + "loss": 0.2429, + "step": 3796 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001140377852722629, + "loss": 0.2862, + "step": 3797 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011400039863999214, + "loss": 0.2399, + "step": 3798 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011396301001156992, + "loss": 0.915, + "step": 3799 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011392561939232706, + "loss": 0.2398, + "step": 3800 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011388822678759464, + "loss": 0.2817, + "step": 3801 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011385083220270401, + "loss": 0.2224, + "step": 3802 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011381343564298683, + "loss": 0.2319, + "step": 3803 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011377603711377504, + "loss": 0.2269, + "step": 3804 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011373863662040087, + "loss": 0.2552, + "step": 3805 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011370123416819682, + "loss": 0.2335, + "step": 3806 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011366382976249564, + "loss": 0.2197, + "step": 3807 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011362642340863034, + "loss": 0.2433, + "step": 3808 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011358901511193431, + "loss": 0.2135, + "step": 3809 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001135516048777412, + "loss": 0.2488, + "step": 3810 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001135141927113848, + "loss": 0.2426, + "step": 3811 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001134767786181993, + "loss": 0.247, + "step": 3812 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011343936260351913, + "loss": 0.2235, + "step": 3813 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011340194467267901, + "loss": 0.2109, + "step": 3814 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011336452483101394, + "loss": 0.2545, + "step": 3815 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011332710308385914, + "loss": 0.2104, + "step": 3816 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011328967943655016, + "loss": 0.2089, + "step": 3817 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011325225389442277, + "loss": 0.2658, + "step": 3818 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011321482646281301, + "loss": 0.2736, + "step": 3819 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011317739714705731, + "loss": 0.2562, + "step": 3820 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011313996595249219, + "loss": 0.2223, + "step": 3821 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011310253288445456, + "loss": 0.2212, + "step": 3822 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011306509794828153, + "loss": 0.2217, + "step": 3823 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011302766114931054, + "loss": 0.2321, + "step": 3824 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011299022249287922, + "loss": 0.2423, + "step": 3825 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011295278198432557, + "loss": 0.2651, + "step": 3826 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001129153396289877, + "loss": 0.2256, + "step": 3827 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011287789543220417, + "loss": 0.2656, + "step": 3828 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011284044939931364, + "loss": 0.2332, + "step": 3829 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001128030015356551, + "loss": 0.2121, + "step": 3830 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011276555184656783, + "loss": 0.2148, + "step": 3831 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011272810033739135, + "loss": 0.234, + "step": 3832 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011269064701346534, + "loss": 0.2466, + "step": 3833 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011265319188012994, + "loss": 0.2008, + "step": 3834 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011261573494272538, + "loss": 0.1905, + "step": 3835 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011257827620659216, + "loss": 0.2515, + "step": 3836 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011254081567707115, + "loss": 0.2579, + "step": 3837 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011250335335950342, + "loss": 0.2598, + "step": 3838 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011246588925923018, + "loss": 0.2399, + "step": 3839 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011242842338159309, + "loss": 0.2181, + "step": 3840 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001123909557319339, + "loss": 0.2744, + "step": 3841 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011235348631559473, + "loss": 0.2149, + "step": 3842 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011231601513791786, + "loss": 0.2184, + "step": 3843 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001122785422042459, + "loss": 0.2098, + "step": 3844 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011224106751992163, + "loss": 0.2277, + "step": 3845 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011220359109028815, + "loss": 0.2571, + "step": 3846 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011216611292068881, + "loss": 0.2087, + "step": 3847 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001121286330164671, + "loss": 0.2497, + "step": 3848 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011209115138296693, + "loss": 0.1869, + "step": 3849 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001120536680255323, + "loss": 0.239, + "step": 3850 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011201618294950756, + "loss": 0.2018, + "step": 3851 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011197869616023722, + "loss": 0.2751, + "step": 3852 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011194120766306611, + "loss": 0.2526, + "step": 3853 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011190371746333923, + "loss": 0.2657, + "step": 3854 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011186622556640194, + "loss": 0.2659, + "step": 3855 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011182873197759971, + "loss": 0.2401, + "step": 3856 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011179123670227833, + "loss": 0.2299, + "step": 3857 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011175373974578378, + "loss": 0.2249, + "step": 3858 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011171624111346232, + "loss": 0.2457, + "step": 3859 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011167874081066045, + "loss": 0.192, + "step": 3860 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011164123884272493, + "loss": 0.2591, + "step": 3861 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011160373521500264, + "loss": 0.2632, + "step": 3862 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011156622993284084, + "loss": 0.248, + "step": 3863 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011152872300158694, + "loss": 0.2071, + "step": 3864 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011149121442658861, + "loss": 0.2935, + "step": 3865 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011145370421319377, + "loss": 0.2191, + "step": 3866 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011141619236675056, + "loss": 0.2737, + "step": 3867 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011137867889260734, + "loss": 0.2281, + "step": 3868 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011134116379611273, + "loss": 0.2083, + "step": 3869 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011130364708261552, + "loss": 0.2079, + "step": 3870 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011126612875746479, + "loss": 0.2423, + "step": 3871 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011122860882600986, + "loss": 0.1903, + "step": 3872 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011119108729360026, + "loss": 0.1995, + "step": 3873 + }, + { + "epoch": 4.59, + "learning_rate": 0.0001111535641655857, + "loss": 0.2479, + "step": 3874 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011111603944731623, + "loss": 0.198, + "step": 3875 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011107851314414197, + "loss": 0.2242, + "step": 3876 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001110409852614134, + "loss": 0.29, + "step": 3877 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011100345580448118, + "loss": 0.1931, + "step": 3878 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011096592477869616, + "loss": 0.2195, + "step": 3879 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001109283921894095, + "loss": 0.2383, + "step": 3880 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011089085804197248, + "loss": 0.2729, + "step": 3881 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011085332234173664, + "loss": 0.1836, + "step": 3882 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011081578509405382, + "loss": 0.2724, + "step": 3883 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011077824630427594, + "loss": 0.2027, + "step": 3884 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011074070597775527, + "loss": 0.2681, + "step": 3885 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011070316411984421, + "loss": 0.205, + "step": 3886 + }, + { + "epoch": 4.61, + "learning_rate": 0.0001106656207358954, + "loss": 0.3106, + "step": 3887 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011062807583126172, + "loss": 0.2126, + "step": 3888 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011059052941129628, + "loss": 0.4017, + "step": 3889 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011055298148135236, + "loss": 0.2406, + "step": 3890 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011051543204678348, + "loss": 0.2833, + "step": 3891 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011047788111294333, + "loss": 0.2224, + "step": 3892 + }, + { + "epoch": 4.62, + "learning_rate": 0.0001104403286851859, + "loss": 0.3536, + "step": 3893 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011040277476886533, + "loss": 0.2373, + "step": 3894 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011036521936933604, + "loss": 0.2297, + "step": 3895 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011032766249195252, + "loss": 0.1979, + "step": 3896 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011029010414206965, + "loss": 0.2434, + "step": 3897 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011025254432504233, + "loss": 0.2897, + "step": 3898 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011021498304622586, + "loss": 0.2121, + "step": 3899 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011017742031097563, + "loss": 0.3021, + "step": 3900 + }, + { + "epoch": 4.63, + "learning_rate": 0.00011013985612464726, + "loss": 0.2463, + "step": 3901 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001101022904925966, + "loss": 0.3078, + "step": 3902 + }, + { + "epoch": 4.63, + "learning_rate": 0.00011006472342017966, + "loss": 0.3664, + "step": 3903 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001100271549127527, + "loss": 0.2176, + "step": 3904 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001099895849756722, + "loss": 0.2137, + "step": 3905 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010995201361429474, + "loss": 0.2588, + "step": 3906 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010991444083397728, + "loss": 0.2686, + "step": 3907 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010987686664007679, + "loss": 0.2235, + "step": 3908 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010983929103795059, + "loss": 0.2602, + "step": 3909 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001098017140329561, + "loss": 0.1857, + "step": 3910 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010976413563045094, + "loss": 0.2307, + "step": 3911 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010972655583579308, + "loss": 0.2658, + "step": 3912 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010968897465434051, + "loss": 0.2106, + "step": 3913 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010965139209145152, + "loss": 0.2122, + "step": 3914 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010961380815248454, + "loss": 0.2433, + "step": 3915 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001095762228427982, + "loss": 0.2032, + "step": 3916 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010953863616775138, + "loss": 0.3393, + "step": 3917 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010950104813270314, + "loss": 0.2476, + "step": 3918 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010946345874301264, + "loss": 0.1929, + "step": 3919 + }, + { + "epoch": 4.65, + "learning_rate": 0.0001094258680040394, + "loss": 0.2509, + "step": 3920 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010938827592114294, + "loss": 0.2103, + "step": 3921 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010935068249968314, + "loss": 0.2297, + "step": 3922 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010931308774501998, + "loss": 0.2259, + "step": 3923 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010927549166251368, + "loss": 0.238, + "step": 3924 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010923789425752456, + "loss": 0.3147, + "step": 3925 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010920029553541326, + "loss": 0.2753, + "step": 3926 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010916269550154048, + "loss": 0.2399, + "step": 3927 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001091250941612672, + "loss": 0.2196, + "step": 3928 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010908749151995452, + "loss": 0.2326, + "step": 3929 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001090498875829638, + "loss": 0.2217, + "step": 3930 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010901228235565651, + "loss": 0.2012, + "step": 3931 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010897467584339434, + "loss": 0.2018, + "step": 3932 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010893706805153915, + "loss": 0.2382, + "step": 3933 + }, + { + "epoch": 4.67, + "learning_rate": 0.000108899458985453, + "loss": 0.2202, + "step": 3934 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010886184865049813, + "loss": 0.2038, + "step": 3935 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010882423705203698, + "loss": 0.2406, + "step": 3936 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010878662419543206, + "loss": 0.2393, + "step": 3937 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010874901008604623, + "loss": 0.2626, + "step": 3938 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010871139472924237, + "loss": 0.246, + "step": 3939 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010867377813038366, + "loss": 0.2228, + "step": 3940 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010863616029483339, + "loss": 0.2091, + "step": 3941 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010859854122795508, + "loss": 0.215, + "step": 3942 + }, + { + "epoch": 4.68, + "learning_rate": 0.0001085609209351123, + "loss": 0.2071, + "step": 3943 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010852329942166894, + "loss": 0.2208, + "step": 3944 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010848567669298901, + "loss": 0.1988, + "step": 3945 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010844805275443673, + "loss": 0.2129, + "step": 3946 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010841042761137634, + "loss": 0.3038, + "step": 3947 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010837280126917248, + "loss": 0.206, + "step": 3948 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010833517373318975, + "loss": 0.2648, + "step": 3949 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010829754500879308, + "loss": 0.2136, + "step": 3950 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001082599151013475, + "loss": 0.3746, + "step": 3951 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010822228401621819, + "loss": 0.2403, + "step": 3952 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010818465175877052, + "loss": 0.2288, + "step": 3953 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001081470183343701, + "loss": 0.2099, + "step": 3954 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010810938374838251, + "loss": 0.1992, + "step": 3955 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001080717480061737, + "loss": 0.2337, + "step": 3956 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010803411111310971, + "loss": 0.2127, + "step": 3957 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010799647307455674, + "loss": 0.2936, + "step": 3958 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010795883389588111, + "loss": 0.3019, + "step": 3959 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010792119358244939, + "loss": 0.2262, + "step": 3960 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010788355213962825, + "loss": 0.2561, + "step": 3961 + }, + { + "epoch": 4.7, + "learning_rate": 0.0001078459095727845, + "loss": 0.2992, + "step": 3962 + }, + { + "epoch": 4.7, + "learning_rate": 0.0001078082658872852, + "loss": 0.2081, + "step": 3963 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010777062108849756, + "loss": 0.2089, + "step": 3964 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010773297518178881, + "loss": 0.2112, + "step": 3965 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010769532817252653, + "loss": 0.1898, + "step": 3966 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010765768006607826, + "loss": 0.3229, + "step": 3967 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010762003086781185, + "loss": 0.2241, + "step": 3968 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010758238058309527, + "loss": 0.2814, + "step": 3969 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010754472921729661, + "loss": 0.2403, + "step": 3970 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010750707677578413, + "loss": 0.2715, + "step": 3971 + }, + { + "epoch": 4.71, + "eval_loss": 3.3954412937164307, + "eval_runtime": 283.9122, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3971 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010746942326392628, + "loss": 0.2263, + "step": 3972 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010743176868709157, + "loss": 0.2433, + "step": 3973 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001073941130506488, + "loss": 0.2871, + "step": 3974 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010735645635996676, + "loss": 0.2416, + "step": 3975 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001073187986204145, + "loss": 0.2563, + "step": 3976 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010728113983736126, + "loss": 0.2502, + "step": 3977 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010724348001617625, + "loss": 0.2145, + "step": 3978 + }, + { + "epoch": 4.72, + "learning_rate": 0.000107205819162229, + "loss": 0.2639, + "step": 3979 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010716815728088912, + "loss": 0.2279, + "step": 3980 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001071304943775264, + "loss": 0.2086, + "step": 3981 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010709283045751069, + "loss": 0.2142, + "step": 3982 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001070551655262121, + "loss": 0.2381, + "step": 3983 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010701749958900078, + "loss": 0.2313, + "step": 3984 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001069798326512471, + "loss": 0.1954, + "step": 3985 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010694216471832152, + "loss": 0.2253, + "step": 3986 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010690449579559469, + "loss": 0.2104, + "step": 3987 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010686682588843737, + "loss": 0.2172, + "step": 3988 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010682915500222051, + "loss": 0.2094, + "step": 3989 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010679148314231504, + "loss": 0.2885, + "step": 3990 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010675381031409225, + "loss": 0.3085, + "step": 3991 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010671613652292343, + "loss": 0.2515, + "step": 3992 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010667846177418003, + "loss": 0.2314, + "step": 3993 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010664078607323367, + "loss": 0.2473, + "step": 3994 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010660310942545608, + "loss": 0.2283, + "step": 3995 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010656543183621912, + "loss": 0.226, + "step": 3996 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010652775331089477, + "loss": 0.2169, + "step": 3997 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010649007385485519, + "loss": 0.2079, + "step": 3998 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010645239347347269, + "loss": 0.2437, + "step": 3999 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010641471217211958, + "loss": 0.2127, + "step": 4000 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010637702995616847, + "loss": 0.2527, + "step": 4001 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010633934683099196, + "loss": 0.2193, + "step": 4002 + }, + { + "epoch": 4.75, + "learning_rate": 0.0001063016628019629, + "loss": 0.2744, + "step": 4003 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010626397787445416, + "loss": 0.2592, + "step": 4004 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010622629205383885, + "loss": 0.2107, + "step": 4005 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010618860534549006, + "loss": 0.1956, + "step": 4006 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010615091775478117, + "loss": 0.2546, + "step": 4007 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010611322928708555, + "loss": 0.2376, + "step": 4008 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010607553994777684, + "loss": 0.2359, + "step": 4009 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010603784974222861, + "loss": 0.2631, + "step": 4010 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010600015867581474, + "loss": 0.2602, + "step": 4011 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010596246675390911, + "loss": 0.2043, + "step": 4012 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010592477398188575, + "loss": 0.2325, + "step": 4013 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001058870803651189, + "loss": 0.2395, + "step": 4014 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010584938590898281, + "loss": 0.2205, + "step": 4015 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010581169061885185, + "loss": 0.2169, + "step": 4016 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010577399450010062, + "loss": 0.1986, + "step": 4017 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001057362975581037, + "loss": 0.2011, + "step": 4018 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010569859979823586, + "loss": 0.2208, + "step": 4019 + }, + { + "epoch": 4.77, + "learning_rate": 0.000105660901225872, + "loss": 0.2478, + "step": 4020 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010562320184638714, + "loss": 0.1936, + "step": 4021 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010558550166515633, + "loss": 0.2719, + "step": 4022 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010554780068755483, + "loss": 0.2873, + "step": 4023 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010551009891895796, + "loss": 0.1993, + "step": 4024 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010547239636474115, + "loss": 0.2174, + "step": 4025 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010543469303028002, + "loss": 0.2009, + "step": 4026 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010539698892095021, + "loss": 0.2038, + "step": 4027 + }, + { + "epoch": 4.78, + "learning_rate": 0.0001053592840421275, + "loss": 0.2119, + "step": 4028 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010532157839918779, + "loss": 0.242, + "step": 4029 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010528387199750707, + "loss": 0.2026, + "step": 4030 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010524616484246146, + "loss": 0.2445, + "step": 4031 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010520845693942719, + "loss": 0.2793, + "step": 4032 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010517074829378057, + "loss": 0.2658, + "step": 4033 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010513303891089803, + "loss": 0.2069, + "step": 4034 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010509532879615614, + "loss": 0.2211, + "step": 4035 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010505761795493145, + "loss": 0.2078, + "step": 4036 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010501990639260079, + "loss": 0.2796, + "step": 4037 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010498219411454098, + "loss": 0.2201, + "step": 4038 + }, + { + "epoch": 4.79, + "learning_rate": 0.000104944481126129, + "loss": 0.198, + "step": 4039 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010490676743274181, + "loss": 0.2182, + "step": 4040 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010486905303975664, + "loss": 0.216, + "step": 4041 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010483133795255071, + "loss": 0.2365, + "step": 4042 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010479362217650137, + "loss": 0.2472, + "step": 4043 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001047559057169861, + "loss": 0.2259, + "step": 4044 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010471818857938238, + "loss": 0.2306, + "step": 4045 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010468047076906793, + "loss": 0.2689, + "step": 4046 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001046427522914204, + "loss": 0.2361, + "step": 4047 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010460503315181768, + "loss": 0.2919, + "step": 4048 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010456731335563769, + "loss": 0.2397, + "step": 4049 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010452959290825846, + "loss": 0.2144, + "step": 4050 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010449187181505804, + "loss": 0.258, + "step": 4051 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010445415008141473, + "loss": 0.2199, + "step": 4052 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010441642771270675, + "loss": 0.1817, + "step": 4053 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010437870471431251, + "loss": 0.2089, + "step": 4054 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010434098109161051, + "loss": 0.2047, + "step": 4055 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010430325684997928, + "loss": 0.2067, + "step": 4056 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010426553199479749, + "loss": 0.1996, + "step": 4057 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001042278065314439, + "loss": 0.2205, + "step": 4058 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001041900804652973, + "loss": 0.2508, + "step": 4059 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010415235380173662, + "loss": 0.2562, + "step": 4060 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010411462654614088, + "loss": 0.2199, + "step": 4061 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010407689870388916, + "loss": 0.2718, + "step": 4062 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010403917028036058, + "loss": 0.2292, + "step": 4063 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010400144128093448, + "loss": 0.3123, + "step": 4064 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010396371171099006, + "loss": 0.2814, + "step": 4065 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010392598157590688, + "loss": 0.231, + "step": 4066 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010388825088106433, + "loss": 0.2242, + "step": 4067 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010385051963184202, + "loss": 0.1998, + "step": 4068 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001038127878336196, + "loss": 0.1902, + "step": 4069 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010377505549177682, + "loss": 0.2198, + "step": 4070 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010373732261169346, + "loss": 0.2537, + "step": 4071 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010369958919874943, + "loss": 0.2267, + "step": 4072 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010366185525832467, + "loss": 0.2376, + "step": 4073 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010362412079579924, + "loss": 0.2076, + "step": 4074 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010358638581655322, + "loss": 0.2507, + "step": 4075 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010354865032596682, + "loss": 0.2077, + "step": 4076 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010351091432942029, + "loss": 0.2762, + "step": 4077 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010347317783229398, + "loss": 0.2232, + "step": 4078 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010343544083996824, + "loss": 0.2475, + "step": 4079 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010339770335782359, + "loss": 0.2108, + "step": 4080 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010335996539124055, + "loss": 0.2544, + "step": 4081 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010332222694559975, + "loss": 0.2253, + "step": 4082 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010328448802628183, + "loss": 0.2324, + "step": 4083 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010324674863866759, + "loss": 0.287, + "step": 4084 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001032090087881378, + "loss": 0.3515, + "step": 4085 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010317126848007337, + "loss": 0.2242, + "step": 4086 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001031335277198552, + "loss": 0.2242, + "step": 4087 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010309578651286436, + "loss": 0.1879, + "step": 4088 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010305804486448186, + "loss": 0.2261, + "step": 4089 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001030203027800889, + "loss": 0.2415, + "step": 4090 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010298256026506662, + "loss": 0.2141, + "step": 4091 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010294481732479635, + "loss": 0.2015, + "step": 4092 + }, + { + "epoch": 4.86, + "learning_rate": 0.0001029070739646593, + "loss": 0.206, + "step": 4093 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010286933019003697, + "loss": 0.2598, + "step": 4094 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010283158600631072, + "loss": 0.2561, + "step": 4095 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010279384141886208, + "loss": 0.1914, + "step": 4096 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010275609643307258, + "loss": 0.2416, + "step": 4097 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010271835105432388, + "loss": 0.2012, + "step": 4098 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010268060528799754, + "loss": 0.3043, + "step": 4099 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010264285913947545, + "loss": 0.2331, + "step": 4100 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010260511261413923, + "loss": 0.237, + "step": 4101 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010256736571737083, + "loss": 0.2776, + "step": 4102 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010252961845455205, + "loss": 0.1938, + "step": 4103 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010249187083106486, + "loss": 0.2596, + "step": 4104 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010245412285229124, + "loss": 0.196, + "step": 4105 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010241637452361323, + "loss": 0.2369, + "step": 4106 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010237862585041293, + "loss": 0.2091, + "step": 4107 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010234087683807247, + "loss": 0.2273, + "step": 4108 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010230312749197406, + "loss": 0.3996, + "step": 4109 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010226537781749987, + "loss": 0.2382, + "step": 4110 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010222762782003223, + "loss": 0.2174, + "step": 4111 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010218987750495343, + "loss": 0.2569, + "step": 4112 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010215212687764593, + "loss": 0.2239, + "step": 4113 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010211437594349203, + "loss": 0.3192, + "step": 4114 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010207662470787427, + "loss": 0.2347, + "step": 4115 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010203887317617511, + "loss": 0.2461, + "step": 4116 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010200112135377709, + "loss": 0.2826, + "step": 4117 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010196336924606283, + "loss": 0.3531, + "step": 4118 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010192561685841496, + "loss": 0.2104, + "step": 4119 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010188786419621612, + "loss": 0.2257, + "step": 4120 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010185011126484903, + "loss": 0.2096, + "step": 4121 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001018123580696964, + "loss": 0.2009, + "step": 4122 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010177460461614108, + "loss": 0.3198, + "step": 4123 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010173685090956582, + "loss": 0.1979, + "step": 4124 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010169909695535354, + "loss": 0.2507, + "step": 4125 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010166134275888708, + "loss": 0.2295, + "step": 4126 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010162358832554937, + "loss": 0.2355, + "step": 4127 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010158583366072338, + "loss": 0.2253, + "step": 4128 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010154807876979213, + "loss": 0.3306, + "step": 4129 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010151032365813859, + "loss": 0.2265, + "step": 4130 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010147256833114586, + "loss": 0.2176, + "step": 4131 + }, + { + "epoch": 4.9, + "learning_rate": 0.000101434812794197, + "loss": 0.2631, + "step": 4132 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010139705705267513, + "loss": 0.2241, + "step": 4133 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010135930111196338, + "loss": 0.2317, + "step": 4134 + }, + { + "epoch": 4.91, + "learning_rate": 0.000101321544977445, + "loss": 0.2325, + "step": 4135 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010128378865450307, + "loss": 0.2011, + "step": 4136 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010124603214852093, + "loss": 0.232, + "step": 4137 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010120827546488174, + "loss": 0.2624, + "step": 4138 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010117051860896885, + "loss": 0.2452, + "step": 4139 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010113276158616553, + "loss": 0.2261, + "step": 4140 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010109500440185514, + "loss": 0.2378, + "step": 4141 + }, + { + "epoch": 4.91, + "learning_rate": 0.000101057247061421, + "loss": 0.2172, + "step": 4142 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010101948957024647, + "loss": 0.2539, + "step": 4143 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010098173193371499, + "loss": 0.2178, + "step": 4144 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010094397415720991, + "loss": 0.2545, + "step": 4145 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010090621624611474, + "loss": 0.2233, + "step": 4146 + }, + { + "epoch": 4.92, + "learning_rate": 0.0001008684582058129, + "loss": 0.2547, + "step": 4147 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010083070004168786, + "loss": 0.232, + "step": 4148 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010079294175912313, + "loss": 0.313, + "step": 4149 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010075518336350218, + "loss": 0.2234, + "step": 4150 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010071742486020854, + "loss": 0.2447, + "step": 4151 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010067966625462577, + "loss": 0.246, + "step": 4152 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010064190755213745, + "loss": 0.1836, + "step": 4153 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010060414875812709, + "loss": 0.2655, + "step": 4154 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010056638987797833, + "loss": 0.2338, + "step": 4155 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010052863091707467, + "loss": 0.2014, + "step": 4156 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010049087188079983, + "loss": 0.2492, + "step": 4157 + }, + { + "epoch": 4.93, + "learning_rate": 0.0001004531127745373, + "loss": 0.2547, + "step": 4158 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010041535360367085, + "loss": 0.2837, + "step": 4159 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010037759437358398, + "loss": 0.2598, + "step": 4160 + }, + { + "epoch": 4.94, + "learning_rate": 0.0001003398350896604, + "loss": 0.2047, + "step": 4161 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010030207575728374, + "loss": 0.2006, + "step": 4162 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010026431638183771, + "loss": 0.2399, + "step": 4163 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010022655696870588, + "loss": 0.2508, + "step": 4164 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010018879752327202, + "loss": 0.2217, + "step": 4165 + } + ], + "logging_steps": 1, + "max_steps": 8330, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 833, + "total_flos": 1.4598637075499581e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4165/trainer_state.json:com.dropbox.attrs b/checkpoint-4165/trainer_state.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..32786d8313a4d06b0c34c1121e3039aa8c883306 Binary files /dev/null and b/checkpoint-4165/trainer_state.json:com.dropbox.attrs differ diff --git a/checkpoint-4165/training_args.bin b/checkpoint-4165/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207 --- /dev/null +++ b/checkpoint-4165/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869 +size 4859 diff --git a/checkpoint-4165/training_args.bin:com.dropbox.attrs b/checkpoint-4165/training_args.bin:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..a91b0f6ae12e6c785b4300279c92a84d88fcd8ef Binary files /dev/null and b/checkpoint-4165/training_args.bin:com.dropbox.attrs differ diff --git a/checkpoint-4998/README.md b/checkpoint-4998/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd5a5d669d6a6bdd984240b8e8bb0a3445b36cda --- /dev/null +++ b/checkpoint-4998/README.md @@ -0,0 +1,218 @@ +--- +library_name: peft +base_model: mistralai/Mixtral-8x7B-v0.1 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +## Training procedure + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 + +### Framework versions + +- PEFT 0.7.0 \ No newline at end of file diff --git a/checkpoint-4998/README.md:com.dropbox.attrs b/checkpoint-4998/README.md:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..27def3893eec6cec258b41a0dceb195b8f9a217c Binary files /dev/null and b/checkpoint-4998/README.md:com.dropbox.attrs differ diff --git a/checkpoint-4998/adapter_config.json b/checkpoint-4998/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1c60bdd91f1a6b73161ce005f7160d2490fd5c8a --- /dev/null +++ b/checkpoint-4998/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "k_proj", + "w1", + "gate", + "w2", + "q_proj", + "w3", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4998/adapter_config.json:com.dropbox.attrs b/checkpoint-4998/adapter_config.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..4493853892c5a03db5492e4e8cd18957f5fbe36b Binary files /dev/null and b/checkpoint-4998/adapter_config.json:com.dropbox.attrs differ diff --git a/checkpoint-4998/adapter_model.safetensors b/checkpoint-4998/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..77acba980210f7b490312f30eb63496d0e4de3d1 --- /dev/null +++ b/checkpoint-4998/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:909de69236e61c484cbf6caa4ba219b395bc1dc00a842d28a4b0f9b60da0ea89 +size 3875879784 diff --git a/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs b/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..1518b19b8f13479185c729190828ae3f9a1f07f5 Binary files /dev/null and b/checkpoint-4998/adapter_model.safetensors:com.dropbox.attrs differ diff --git a/checkpoint-4998/optimizer.pt b/checkpoint-4998/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c00f5f315153ff5085f45b6821d180efcb2d1a2 --- /dev/null +++ b/checkpoint-4998/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f3156d0bd06702e3c7446fc2a1546a9e5281333f6573eeddf03d18286b1efe +size 1943844127 diff --git a/checkpoint-4998/optimizer.pt:com.dropbox.attrs b/checkpoint-4998/optimizer.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..0a2aec84a95eef460302e3406a3f5aa188e99d8a Binary files /dev/null and b/checkpoint-4998/optimizer.pt:com.dropbox.attrs differ diff --git a/checkpoint-4998/rng_state.pth b/checkpoint-4998/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc4eaae57c8679ef7d42f823db407c87f09332ff --- /dev/null +++ b/checkpoint-4998/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45216804b71992ea0913e7ffea6dccebecd90e7c55671ba4e91a8c3b8b8ad5f +size 14575 diff --git a/checkpoint-4998/rng_state.pth:com.dropbox.attrs b/checkpoint-4998/rng_state.pth:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..e9a3653fbb8cc62699d9dd3313b5032c23fe173a Binary files /dev/null and b/checkpoint-4998/rng_state.pth:com.dropbox.attrs differ diff --git a/checkpoint-4998/scheduler.pt b/checkpoint-4998/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f65da8843bdc761fdb378c9899aa3b235d782366 --- /dev/null +++ b/checkpoint-4998/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3918a3cb0b71ac100385ce1eef20ea8ad28fb8150d1e4e96f5c061a6884160ae +size 627 diff --git a/checkpoint-4998/scheduler.pt:com.dropbox.attrs b/checkpoint-4998/scheduler.pt:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..b2178948b50537034baa78a63d66d299e08accb8 Binary files /dev/null and b/checkpoint-4998/scheduler.pt:com.dropbox.attrs differ diff --git a/checkpoint-4998/trainer_state.json b/checkpoint-4998/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..062b17aa12e04ff2b22b5ad5a7144331236505d0 --- /dev/null +++ b/checkpoint-4998/trainer_state.json @@ -0,0 +1,30201 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.925570228091236, + "eval_steps": 209, + "global_step": 4998, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 2.1426, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 2.071432113647461, + "eval_runtime": 279.6718, + "eval_samples_per_second": 0.737, + "eval_steps_per_second": 0.737, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 2.4033, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 6e-05, + "loss": 2.1893, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 2.3226, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 2.2485, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 1.9704, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 0.00014, + "loss": 1.6929, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016, + "loss": 2.2957, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018, + "loss": 1.9907, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 2.1295, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999999287109068, + "loss": 2.2249, + "step": 11 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019999997148436365, + "loss": 2.1733, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199999935839822, + "loss": 2.1404, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999988593747084, + "loss": 2.0236, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999982177731722, + "loss": 1.9639, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999974335937034, + "loss": 1.692, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999965068364137, + "loss": 2.3609, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999954375014348, + "loss": 2.3553, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999942255889198, + "loss": 1.5733, + "step": 19 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019999928710990412, + "loss": 1.7505, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999913740319922, + "loss": 2.3068, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999897343879862, + "loss": 1.8371, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999987952167257, + "loss": 1.9852, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999860273700585, + "loss": 1.9625, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999839599966655, + "loss": 2.1089, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999817500473724, + "loss": 2.1086, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999793975224945, + "loss": 2.0284, + "step": 27 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999769024223673, + "loss": 2.3641, + "step": 28 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999742647473464, + "loss": 1.963, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999714844978078, + "loss": 2.0635, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999968561674148, + "loss": 1.9304, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999654962767839, + "loss": 1.4124, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999622883061518, + "loss": 2.1444, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999589377627102, + "loss": 1.6477, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955444646936, + "loss": 2.2601, + "step": 35 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999518089593282, + "loss": 1.6256, + "step": 36 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999948030700404, + "loss": 1.9155, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999441098707025, + "loss": 2.1408, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999400464707832, + "loss": 2.104, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999935840501225, + "loss": 1.9841, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999314919626272, + "loss": 1.5924, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999270008556108, + "loss": 1.9956, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999223671808154, + "loss": 1.4673, + "step": 43 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999175909389018, + "loss": 2.1595, + "step": 44 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019999126721305513, + "loss": 1.8439, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999076107564648, + "loss": 1.9961, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019999024068173638, + "loss": 2.1504, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998970603139912, + "loss": 2.2907, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999891571247108, + "loss": 1.5709, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999885939617498, + "loss": 2.4504, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998801654259632, + "loss": 2.3787, + "step": 51 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999874248673328, + "loss": 2.0434, + "step": 52 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019998681893604347, + "loss": 2.1671, + "step": 53 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999861987488148, + "loss": 1.7432, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998556430573521, + "loss": 1.7737, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998491560689513, + "loss": 2.0122, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999842526523871, + "loss": 1.7545, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998357544230558, + "loss": 2.201, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998288397674716, + "loss": 2.0396, + "step": 59 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999821782558104, + "loss": 1.9275, + "step": 60 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019998145827959598, + "loss": 1.7797, + "step": 61 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999807240482065, + "loss": 2.1463, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997997556174665, + "loss": 1.935, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999792128203232, + "loss": 2.1182, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999784358240448, + "loss": 2.2297, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997764457302234, + "loss": 2.1052, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999768390673686, + "loss": 2.0777, + "step": 67 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997601930719835, + "loss": 2.1419, + "step": 68 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999751852926286, + "loss": 2.2586, + "step": 69 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019997433702377817, + "loss": 1.9089, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997347450076801, + "loss": 2.0587, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997259772372116, + "loss": 2.4143, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997170669276256, + "loss": 1.947, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019997080140801932, + "loss": 2.008, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996988186962041, + "loss": 2.4912, + "step": 75 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996894807769707, + "loss": 2.0279, + "step": 76 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996800003238232, + "loss": 1.9914, + "step": 77 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001999670377338114, + "loss": 1.9091, + "step": 78 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019996606118212148, + "loss": 1.8038, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996507037745183, + "loss": 2.3573, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996406531994364, + "loss": 2.3204, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999630460097403, + "loss": 2.1619, + "step": 82 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001999620124469871, + "loss": 1.9977, + "step": 83 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019996096463183142, + "loss": 2.195, + "step": 84 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995990256442263, + "loss": 1.9909, + "step": 85 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995882624491217, + "loss": 2.2001, + "step": 86 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995773567345354, + "loss": 1.5795, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995663085020212, + "loss": 2.174, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995551177531557, + "loss": 1.9605, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995437844895334, + "loss": 2.1768, + "step": 90 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999532308712771, + "loss": 1.6906, + "step": 91 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995206904245037, + "loss": 2.1029, + "step": 92 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019995089296263893, + "loss": 2.0652, + "step": 93 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019994970263201035, + "loss": 2.1733, + "step": 94 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001999484980507344, + "loss": 1.9413, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999472792189828, + "loss": 1.9538, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994604613692935, + "loss": 2.4158, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994479880474988, + "loss": 1.8964, + "step": 98 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999435372226222, + "loss": 2.3135, + "step": 99 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001999422613907262, + "loss": 2.127, + "step": 100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019994097130924374, + "loss": 1.9954, + "step": 101 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993966697835883, + "loss": 2.1363, + "step": 102 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993834839825738, + "loss": 1.7779, + "step": 103 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019993701556912742, + "loss": 2.0923, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993566849115898, + "loss": 1.9183, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993430716454413, + "loss": 1.7894, + "step": 106 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019993293158947694, + "loss": 2.0094, + "step": 107 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999315417661536, + "loss": 2.1469, + "step": 108 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999301376947722, + "loss": 1.6924, + "step": 109 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001999287193755329, + "loss": 2.1794, + "step": 110 + }, + { + "epoch": 0.13, + "learning_rate": 0.000199927286808638, + "loss": 2.1338, + "step": 111 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019992583999429178, + "loss": 1.9988, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999243789327004, + "loss": 2.0735, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999229036240723, + "loss": 2.0521, + "step": 114 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019992141406861776, + "loss": 1.9441, + "step": 115 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991991026654918, + "loss": 2.1244, + "step": 116 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999183922180809, + "loss": 1.7937, + "step": 117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001999168599234295, + "loss": 2.2603, + "step": 118 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991531338281332, + "loss": 2.1846, + "step": 119 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019991375259645293, + "loss": 2.3241, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991217756457085, + "loss": 2.0926, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019991058828739165, + "loss": 2.0092, + "step": 122 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990898476514193, + "loss": 1.8076, + "step": 123 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990736699805029, + "loss": 2.0369, + "step": 124 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990573498634742, + "loss": 2.0488, + "step": 125 + }, + { + "epoch": 0.15, + "learning_rate": 0.000199904088730266, + "loss": 2.1534, + "step": 126 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990242823004074, + "loss": 2.1406, + "step": 127 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019990075348590839, + "loss": 1.9379, + "step": 128 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019989906449810775, + "loss": 1.9781, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989736126687963, + "loss": 1.973, + "step": 130 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989564379246683, + "loss": 1.6825, + "step": 131 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989391207511428, + "loss": 2.0843, + "step": 132 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989216611506887, + "loss": 1.8547, + "step": 133 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019989040591257952, + "loss": 1.7626, + "step": 134 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001998886314678972, + "loss": 2.0531, + "step": 135 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988684278127497, + "loss": 2.0031, + "step": 136 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019988503985296773, + "loss": 1.9342, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988322268323268, + "loss": 2.3297, + "step": 138 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019988139127232878, + "loss": 2.3401, + "step": 139 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987954562051725, + "loss": 1.8983, + "step": 140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001998776857280612, + "loss": 2.0621, + "step": 141 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987581159522578, + "loss": 2.0574, + "step": 142 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987392322227824, + "loss": 1.9516, + "step": 143 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987202060948783, + "loss": 2.1402, + "step": 144 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019987010375712577, + "loss": 1.8903, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986817266546539, + "loss": 1.8248, + "step": 146 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986622733478204, + "loss": 1.9877, + "step": 147 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986426776535306, + "loss": 1.6272, + "step": 148 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986229395745785, + "loss": 1.8605, + "step": 149 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019986030591137783, + "loss": 1.6848, + "step": 150 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985830362739647, + "loss": 2.1922, + "step": 151 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998562871057992, + "loss": 2.0238, + "step": 152 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001998542563468736, + "loss": 2.2246, + "step": 153 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019985221135090914, + "loss": 1.9438, + "step": 154 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019985015211819744, + "loss": 2.2136, + "step": 155 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998480786490321, + "loss": 2.4563, + "step": 156 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984599094370874, + "loss": 2.2138, + "step": 157 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984388900252503, + "loss": 2.2679, + "step": 158 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019984177282578064, + "loss": 1.9537, + "step": 159 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998396424137773, + "loss": 2.0803, + "step": 160 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001998374977668188, + "loss": 2.0282, + "step": 161 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019983533888521087, + "loss": 2.0157, + "step": 162 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998331657692613, + "loss": 1.7837, + "step": 163 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019983097841928, + "loss": 2.1556, + "step": 164 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982877683557879, + "loss": 2.1447, + "step": 165 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982656101847162, + "loss": 2.4139, + "step": 166 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998243309682743, + "loss": 1.6788, + "step": 167 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019982208668530493, + "loss": 1.9008, + "step": 168 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001998198281698834, + "loss": 2.173, + "step": 169 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019981755542233177, + "loss": 2.1837, + "step": 170 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981526844297404, + "loss": 2.0639, + "step": 171 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981296723213632, + "loss": 2.3864, + "step": 172 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019981065179014673, + "loss": 1.923, + "step": 173 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980832211733535, + "loss": 1.9192, + "step": 174 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019980597821403438, + "loss": 2.0335, + "step": 175 + }, + { + "epoch": 0.21, + "learning_rate": 0.000199803620080578, + "loss": 1.8172, + "step": 176 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001998012477173024, + "loss": 2.0294, + "step": 177 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979886112454586, + "loss": 2.2889, + "step": 178 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019979646030264867, + "loss": 1.8498, + "step": 179 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997940452519531, + "loss": 2.0797, + "step": 180 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997916159728035, + "loss": 2.2356, + "step": 181 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997891724655462, + "loss": 2.1187, + "step": 182 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978671473052964, + "loss": 1.9301, + "step": 183 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019978424276810423, + "loss": 1.8582, + "step": 184 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001997817565786224, + "loss": 2.144, + "step": 185 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977925616243862, + "loss": 2.0595, + "step": 186 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019977674151990945, + "loss": 1.9104, + "step": 187 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977421265139332, + "loss": 1.9727, + "step": 188 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019977166955725088, + "loss": 1.8727, + "step": 189 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997691122378447, + "loss": 2.0611, + "step": 190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997665406935394, + "loss": 2.0745, + "step": 191 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001997639549247016, + "loss": 1.9974, + "step": 192 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019976135493169996, + "loss": 1.9856, + "step": 193 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975874071490526, + "loss": 1.778, + "step": 194 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019975611227469016, + "loss": 1.8347, + "step": 195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997534696114294, + "loss": 1.5555, + "step": 196 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019975081272549989, + "loss": 1.5625, + "step": 197 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974814161728032, + "loss": 1.9997, + "step": 198 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974545628715157, + "loss": 1.9523, + "step": 199 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974275673549654, + "loss": 2.1557, + "step": 200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019974004296270006, + "loss": 1.8306, + "step": 201 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973731496914914, + "loss": 2.0051, + "step": 202 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019973457275523264, + "loss": 2.201, + "step": 203 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001997318163213416, + "loss": 2.2446, + "step": 204 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972904566786903, + "loss": 2.1172, + "step": 205 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972626079520995, + "loss": 1.9849, + "step": 206 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019972346170376142, + "loss": 1.9774, + "step": 207 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001997206483939225, + "loss": 1.7625, + "step": 208 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971782086609436, + "loss": 2.2346, + "step": 209 + }, + { + "epoch": 0.25, + "eval_loss": 2.00066876411438, + "eval_runtime": 282.7648, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 209 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971497912068013, + "loss": 2.4185, + "step": 210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019971212315808497, + "loss": 1.946, + "step": 211 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019970925297871605, + "loss": 2.0049, + "step": 212 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970636858298267, + "loss": 1.9545, + "step": 213 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970346997129598, + "loss": 1.9636, + "step": 214 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019970055714406938, + "loss": 1.9068, + "step": 215 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969763010171807, + "loss": 1.5749, + "step": 216 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019969468884465942, + "loss": 1.7676, + "step": 217 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996917333733128, + "loss": 2.0329, + "step": 218 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001996887636880996, + "loss": 1.9307, + "step": 219 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019968577978944323, + "loss": 2.134, + "step": 220 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019968278167776908, + "loss": 2.0911, + "step": 221 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967976935350467, + "loss": 2.5057, + "step": 222 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001996767428170795, + "loss": 1.9267, + "step": 223 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967370206892503, + "loss": 2.3569, + "step": 224 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019967064710947488, + "loss": 1.992, + "step": 225 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966757793916454, + "loss": 2.01, + "step": 226 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966449455843165, + "loss": 1.8037, + "step": 227 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019966139696771587, + "loss": 2.2498, + "step": 228 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019965828516745876, + "loss": 1.6563, + "step": 229 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996551591581041, + "loss": 1.979, + "step": 230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996520189400975, + "loss": 2.1553, + "step": 231 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996488645138867, + "loss": 1.8743, + "step": 232 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964569587992148, + "loss": 2.1907, + "step": 233 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019964251303865362, + "loss": 2.0644, + "step": 234 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019963931599053692, + "loss": 2.1721, + "step": 235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996361047360272, + "loss": 2.2267, + "step": 236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001996328792755823, + "loss": 1.9445, + "step": 237 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019962963960966213, + "loss": 2.2003, + "step": 238 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996263857387286, + "loss": 2.3114, + "step": 239 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996231176632456, + "loss": 1.8553, + "step": 240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961983538367914, + "loss": 2.1349, + "step": 241 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019961653890049715, + "loss": 1.8784, + "step": 242 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001996132282141697, + "loss": 2.0118, + "step": 243 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960990332516874, + "loss": 1.9938, + "step": 244 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019960656423396834, + "loss": 2.2582, + "step": 245 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019960321094104465, + "loss": 2.1807, + "step": 246 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959984344687578, + "loss": 1.9084, + "step": 247 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019959646175194174, + "loss": 2.2879, + "step": 248 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995930658567248, + "loss": 1.942, + "step": 249 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958965576170908, + "loss": 2.1313, + "step": 250 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019958623146738088, + "loss": 2.3202, + "step": 251 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001995827929742283, + "loss": 1.7832, + "step": 252 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957934028274162, + "loss": 1.7103, + "step": 253 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019957587339341321, + "loss": 1.9912, + "step": 254 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995723923067373, + "loss": 1.6686, + "step": 255 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956889702321023, + "loss": 1.966, + "step": 256 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956538754333034, + "loss": 2.2287, + "step": 257 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019956186386759804, + "loss": 1.4866, + "step": 258 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995583259965157, + "loss": 1.9599, + "step": 259 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019955477393058773, + "loss": 1.9273, + "step": 260 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001995512076703206, + "loss": 1.847, + "step": 261 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019954762721622279, + "loss": 2.0535, + "step": 262 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995440325688048, + "loss": 2.4403, + "step": 263 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019954042372857908, + "loss": 1.8712, + "step": 264 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953680069606026, + "loss": 2.1837, + "step": 265 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019953316347176488, + "loss": 2.0398, + "step": 266 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995295120562115, + "loss": 2.1135, + "step": 267 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952584644992075, + "loss": 2.0358, + "step": 268 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019952216665341526, + "loss": 2.3282, + "step": 269 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001995184726672197, + "loss": 1.9741, + "step": 270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951476449186074, + "loss": 1.7523, + "step": 271 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019951104212786712, + "loss": 2.1509, + "step": 272 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001995073055757695, + "loss": 2.0865, + "step": 273 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019950355483610067, + "loss": 1.8972, + "step": 274 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019949978990939542, + "loss": 2.4693, + "step": 275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994960107961905, + "loss": 1.9307, + "step": 276 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994922174970248, + "loss": 2.0097, + "step": 277 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994884100124391, + "loss": 1.6561, + "step": 278 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001994845883429763, + "loss": 2.3069, + "step": 279 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019948075248918124, + "loss": 2.0134, + "step": 280 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947690245160091, + "loss": 2.1061, + "step": 281 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019947303823078416, + "loss": 2.0855, + "step": 282 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946915982728197, + "loss": 1.5672, + "step": 283 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001994652672416473, + "loss": 1.7289, + "step": 284 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019946136047443522, + "loss": 1.9013, + "step": 285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945743952620268, + "loss": 2.3105, + "step": 286 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019945350439750872, + "loss": 2.341, + "step": 287 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944955508891443, + "loss": 1.88, + "step": 288 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994455916009829, + "loss": 1.913, + "step": 289 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019944161393427922, + "loss": 1.9513, + "step": 290 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943762208937053, + "loss": 2.3331, + "step": 291 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019943361606682597, + "loss": 2.3024, + "step": 292 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942959586721672, + "loss": 2.2222, + "step": 293 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019942556149111598, + "loss": 2.1003, + "step": 294 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001994215129390989, + "loss": 1.9038, + "step": 295 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941745021174282, + "loss": 1.6068, + "step": 296 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019941337330962693, + "loss": 1.8894, + "step": 297 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940928223333252, + "loss": 2.3158, + "step": 298 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001994051769834429, + "loss": 2.1015, + "step": 299 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019940105756054337, + "loss": 2.1519, + "step": 300 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939692396522127, + "loss": 1.7233, + "step": 301 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019939277619806598, + "loss": 1.85, + "step": 302 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938861425966887, + "loss": 2.2368, + "step": 303 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019938443815062335, + "loss": 1.765, + "step": 304 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993802478715248, + "loss": 1.6333, + "step": 305 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937604342297073, + "loss": 2.191, + "step": 306 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019937182480556055, + "loss": 2.2402, + "step": 307 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019936759201989577, + "loss": 2.0568, + "step": 308 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001993633450665799, + "loss": 2.4314, + "step": 309 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935908394621844, + "loss": 2.0556, + "step": 310 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935480865941894, + "loss": 2.0988, + "step": 311 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019935051920679094, + "loss": 2.0964, + "step": 312 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019934621558894607, + "loss": 1.9365, + "step": 313 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993418978064979, + "loss": 1.6224, + "step": 314 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933756586006202, + "loss": 2.144, + "step": 315 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019933321975025616, + "loss": 2.2899, + "step": 316 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019932885947769992, + "loss": 1.8865, + "step": 317 + }, + { + "epoch": 0.38, + "learning_rate": 0.000199324485043015, + "loss": 2.3996, + "step": 318 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001993200964468251, + "loss": 1.3858, + "step": 319 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019931569368975588, + "loss": 2.2231, + "step": 320 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019931127677243516, + "loss": 2.0537, + "step": 321 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930684569549264, + "loss": 2.1381, + "step": 322 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019930240045956012, + "loss": 2.0152, + "step": 323 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992979410652714, + "loss": 2.0293, + "step": 324 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019929346751326228, + "loss": 1.7457, + "step": 325 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928897980417057, + "loss": 1.987, + "step": 326 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019928447793863616, + "loss": 2.2451, + "step": 327 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019927996191730093, + "loss": 2.3312, + "step": 328 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001992754317408087, + "loss": 1.8771, + "step": 329 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992708874098054, + "loss": 1.833, + "step": 330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926632892493896, + "loss": 1.9343, + "step": 331 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019926175628685937, + "loss": 2.2328, + "step": 332 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992571694962185, + "loss": 1.9916, + "step": 333 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992525685536704, + "loss": 1.9497, + "step": 334 + }, + { + "epoch": 0.4, + "learning_rate": 0.000199247953459871, + "loss": 2.029, + "step": 335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019924332421547835, + "loss": 2.0326, + "step": 336 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001992386808211525, + "loss": 2.6406, + "step": 337 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019923402327755546, + "loss": 2.3811, + "step": 338 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922935158535129, + "loss": 1.6143, + "step": 339 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019922466574520608, + "loss": 2.2182, + "step": 340 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921996575778794, + "loss": 2.218, + "step": 341 + }, + { + "epoch": 0.41, + "learning_rate": 0.000199215251623767, + "loss": 1.8615, + "step": 342 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019921052334381534, + "loss": 2.165, + "step": 343 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019920578091860716, + "loss": 2.1627, + "step": 344 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001992010243488186, + "loss": 2.154, + "step": 345 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919625363512786, + "loss": 1.5966, + "step": 346 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019919146877821512, + "loss": 2.0903, + "step": 347 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991866697787626, + "loss": 2.2322, + "step": 348 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019918185663745456, + "loss": 1.9319, + "step": 349 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917702935497725, + "loss": 2.1367, + "step": 350 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019917218793201886, + "loss": 2.1767, + "step": 351 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019916733236926976, + "loss": 2.1009, + "step": 352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991624626674222, + "loss": 2.1286, + "step": 353 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001991575788271705, + "loss": 2.181, + "step": 354 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019915268084921101, + "loss": 2.12, + "step": 355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019914776873424206, + "loss": 1.9895, + "step": 356 + }, + { + "epoch": 0.43, + "learning_rate": 0.000199142842482964, + "loss": 1.9285, + "step": 357 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991379020960792, + "loss": 2.2376, + "step": 358 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001991329475742921, + "loss": 2.1274, + "step": 359 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912797891830908, + "loss": 2.0043, + "step": 360 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019912299612883852, + "loss": 2.022, + "step": 361 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019911799920659093, + "loss": 1.7343, + "step": 362 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991129881522787, + "loss": 2.0621, + "step": 363 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019910796296661632, + "loss": 1.5116, + "step": 364 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001991029236503203, + "loss": 2.0485, + "step": 365 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909787020410907, + "loss": 1.971, + "step": 366 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019909280262870324, + "loss": 1.9724, + "step": 367 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908772092482524, + "loss": 1.318, + "step": 368 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019908262509319964, + "loss": 2.0539, + "step": 369 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019907751513455302, + "loss": 2.1097, + "step": 370 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019907239104961392, + "loss": 2.0632, + "step": 371 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906725283911296, + "loss": 2.1897, + "step": 372 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019906210050378266, + "loss": 2.2002, + "step": 373 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905693404435773, + "loss": 1.9005, + "step": 374 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019905175346157474, + "loss": 1.9873, + "step": 375 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904655875617233, + "loss": 1.7215, + "step": 376 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019904134992889113, + "loss": 2.0434, + "step": 377 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903612698047383, + "loss": 2.4223, + "step": 378 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019903088991166513, + "loss": 2.0837, + "step": 379 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902563872321172, + "loss": 2.2389, + "step": 380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019902037341586225, + "loss": 1.7205, + "step": 381 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001990150939903675, + "loss": 1.9577, + "step": 382 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019900980044748015, + "loss": 1.8778, + "step": 383 + }, + { + "epoch": 0.46, + "learning_rate": 0.000199004492787955, + "loss": 2.2213, + "step": 384 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899917101254874, + "loss": 2.0927, + "step": 385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019899383512202019, + "loss": 2.2921, + "step": 386 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001989884851171301, + "loss": 2.2983, + "step": 387 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989831209986413, + "loss": 1.8052, + "step": 388 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897774276731857, + "loss": 1.7741, + "step": 389 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019897235042392873, + "loss": 1.779, + "step": 390 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896694396924063, + "loss": 1.6924, + "step": 391 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019896152340402509, + "loss": 2.036, + "step": 392 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019895608872905494, + "loss": 2.04, + "step": 393 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001989506399451051, + "loss": 2.1702, + "step": 394 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019894517705295245, + "loss": 1.9429, + "step": 395 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893970005337584, + "loss": 2.0528, + "step": 396 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019893420894715618, + "loss": 1.7906, + "step": 397 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989287037350764, + "loss": 2.3494, + "step": 398 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019892318441792138, + "loss": 1.7415, + "step": 399 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989176509964781, + "loss": 2.0184, + "step": 400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989121034715355, + "loss": 1.9277, + "step": 401 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001989065418438845, + "loss": 2.2168, + "step": 402 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019890096611431814, + "loss": 2.6114, + "step": 403 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019889537628363133, + "loss": 2.0713, + "step": 404 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888977235262104, + "loss": 2.2966, + "step": 405 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019888415432208636, + "loss": 2.5206, + "step": 406 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887852219282822, + "loss": 2.4503, + "step": 407 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019887287596564966, + "loss": 2.102, + "step": 408 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886721564135572, + "loss": 2.3275, + "step": 409 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019886154122075343, + "loss": 2.0481, + "step": 410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885585270465182, + "loss": 1.8395, + "step": 411 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019885015009386202, + "loss": 2.3535, + "step": 412 + }, + { + "epoch": 0.5, + "learning_rate": 0.000198844433389197, + "loss": 2.0147, + "step": 413 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988387025914719, + "loss": 2.1919, + "step": 414 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988329577015038, + "loss": 2.156, + "step": 415 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882719872011176, + "loss": 2.2672, + "step": 416 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019882142564811694, + "loss": 2.3242, + "step": 417 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001988156384863424, + "loss": 2.0259, + "step": 418 + }, + { + "epoch": 0.5, + "eval_loss": 1.9941134452819824, + "eval_runtime": 282.533, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 418 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880983723561332, + "loss": 1.7039, + "step": 419 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019880402189675678, + "loss": 2.1007, + "step": 420 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879819247060193, + "loss": 2.2297, + "step": 421 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019879234895797996, + "loss": 1.6166, + "step": 422 + }, + { + "epoch": 0.51, + "learning_rate": 0.000198786491359724, + "loss": 2.408, + "step": 423 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019878061967666915, + "loss": 1.686, + "step": 424 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001987747339096527, + "loss": 2.0492, + "step": 425 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876883405951377, + "loss": 2.2179, + "step": 426 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019876292012709356, + "loss": 1.8812, + "step": 427 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019875699211323528, + "loss": 2.2888, + "step": 428 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019875105001878409, + "loss": 2.0561, + "step": 429 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019874509384458725, + "loss": 1.9299, + "step": 430 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873912359149397, + "loss": 2.1999, + "step": 431 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019873313926035548, + "loss": 1.8509, + "step": 432 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019872714085202503, + "loss": 1.8281, + "step": 433 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987211283673578, + "loss": 1.8359, + "step": 434 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001987151018072111, + "loss": 2.2844, + "step": 435 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870906117244416, + "loss": 1.9397, + "step": 436 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019870300646391824, + "loss": 2.302, + "step": 437 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869693768249661, + "loss": 2.1176, + "step": 438 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019869085482904458, + "loss": 2.1909, + "step": 439 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986847579044294, + "loss": 2.2382, + "step": 440 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867864690952035, + "loss": 2.0988, + "step": 441 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019867252184518878, + "loss": 2.2136, + "step": 442 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001986663827123079, + "loss": 1.9324, + "step": 443 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019866022951175308, + "loss": 2.1274, + "step": 444 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019865406224440165, + "loss": 1.8625, + "step": 445 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019864788091113287, + "loss": 2.0009, + "step": 446 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001986416855128281, + "loss": 2.2245, + "step": 447 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019863547605037063, + "loss": 2.0654, + "step": 448 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862925252464586, + "loss": 1.4339, + "step": 449 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019862301493654108, + "loss": 2.1347, + "step": 450 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861676328694562, + "loss": 1.7029, + "step": 451 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019861049757675088, + "loss": 2.0081, + "step": 452 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019860421780685018, + "loss": 1.9994, + "step": 453 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985979239781389, + "loss": 1.9325, + "step": 454 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019859161609151436, + "loss": 1.8502, + "step": 455 + }, + { + "epoch": 0.55, + "learning_rate": 0.000198585294147876, + "loss": 2.3779, + "step": 456 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019857895814812509, + "loss": 2.0303, + "step": 457 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985726080931651, + "loss": 1.9898, + "step": 458 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019856624398390137, + "loss": 1.7648, + "step": 459 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019855986582124126, + "loss": 1.7822, + "step": 460 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001985534736060942, + "loss": 1.9219, + "step": 461 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019854706733937155, + "loss": 2.1789, + "step": 462 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019854064702198675, + "loss": 1.9091, + "step": 463 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019853421265485514, + "loss": 1.9941, + "step": 464 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985277642388941, + "loss": 1.904, + "step": 465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019852130177502316, + "loss": 1.6299, + "step": 466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985148252641636, + "loss": 1.7712, + "step": 467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019850833470723886, + "loss": 1.6825, + "step": 468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001985018301051744, + "loss": 1.7408, + "step": 469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019849531145889758, + "loss": 2.0622, + "step": 470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019848877876933784, + "loss": 1.5699, + "step": 471 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001984822320374266, + "loss": 2.0253, + "step": 472 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019847567126409724, + "loss": 2.2186, + "step": 473 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019846909645028523, + "loss": 2.0872, + "step": 474 + }, + { + "epoch": 0.57, + "learning_rate": 0.000198462507596928, + "loss": 1.9362, + "step": 475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019845590470496497, + "loss": 2.4109, + "step": 476 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844928777533753, + "loss": 2.2626, + "step": 477 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019844265680898918, + "loss": 2.0874, + "step": 478 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984360118068653, + "loss": 2.1606, + "step": 479 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984293527699133, + "loss": 2.063, + "step": 480 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019842267969908265, + "loss": 1.9065, + "step": 481 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001984159925953248, + "loss": 1.9511, + "step": 482 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840929145959317, + "loss": 2.056, + "step": 483 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019840257629284317, + "loss": 2.2353, + "step": 484 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019839584709603226, + "loss": 1.9401, + "step": 485 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001983891038701199, + "loss": 1.9648, + "step": 486 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019838234661606748, + "loss": 1.753, + "step": 487 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019837557533483846, + "loss": 1.7805, + "step": 488 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836879002739827, + "loss": 2.192, + "step": 489 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019836199069471437, + "loss": 1.9112, + "step": 490 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019835517733775615, + "loss": 2.0119, + "step": 491 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001983483499574951, + "loss": 1.8932, + "step": 492 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019834150855490464, + "loss": 1.5968, + "step": 493 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019833465313096017, + "loss": 2.1493, + "step": 494 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019832778368663917, + "loss": 1.8863, + "step": 495 + }, + { + "epoch": 0.6, + "learning_rate": 0.000198320900222921, + "loss": 2.2134, + "step": 496 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019831400274078717, + "loss": 2.2831, + "step": 497 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019830709124122112, + "loss": 2.0266, + "step": 498 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001983001657252082, + "loss": 2.3392, + "step": 499 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019829322619373588, + "loss": 1.8426, + "step": 500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019828627264779363, + "loss": 2.0742, + "step": 501 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001982793050883728, + "loss": 1.9578, + "step": 502 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019827232351646686, + "loss": 2.0863, + "step": 503 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982653279330712, + "loss": 2.2881, + "step": 504 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019825831833918323, + "loss": 1.8869, + "step": 505 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982512947358024, + "loss": 1.8997, + "step": 506 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019824425712393012, + "loss": 1.8945, + "step": 507 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019823720550456977, + "loss": 1.9496, + "step": 508 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001982301398787268, + "loss": 2.1066, + "step": 509 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019822306024740852, + "loss": 1.958, + "step": 510 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019821596661162447, + "loss": 2.1112, + "step": 511 + }, + { + "epoch": 0.61, + "learning_rate": 0.00019820885897238596, + "loss": 2.1012, + "step": 512 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001982017373307064, + "loss": 2.2623, + "step": 513 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019819460168760117, + "loss": 2.5058, + "step": 514 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981874520440877, + "loss": 2.1367, + "step": 515 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019818028840118532, + "loss": 2.2743, + "step": 516 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019817311075991543, + "loss": 1.5517, + "step": 517 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001981659191213014, + "loss": 1.9569, + "step": 518 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815871348636863, + "loss": 2.0566, + "step": 519 + }, + { + "epoch": 0.62, + "learning_rate": 0.00019815149385614444, + "loss": 1.8859, + "step": 520 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019814426023165825, + "loss": 2.0298, + "step": 521 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019813701261394136, + "loss": 2.0614, + "step": 522 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812975100402715, + "loss": 2.221, + "step": 523 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019812247540295096, + "loss": 2.1255, + "step": 524 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019811518581175014, + "loss": 2.1885, + "step": 525 + }, + { + "epoch": 0.63, + "learning_rate": 0.000198107882231464, + "loss": 2.3918, + "step": 526 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019810056466313392, + "loss": 2.2759, + "step": 527 + }, + { + "epoch": 0.63, + "learning_rate": 0.00019809323310780318, + "loss": 1.9727, + "step": 528 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980858875665171, + "loss": 2.0417, + "step": 529 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019807852804032305, + "loss": 1.645, + "step": 530 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980711545302703, + "loss": 1.7943, + "step": 531 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019806376703741015, + "loss": 1.8844, + "step": 532 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019805636556279588, + "loss": 2.1128, + "step": 533 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001980489501074828, + "loss": 2.0272, + "step": 534 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019804152067252816, + "loss": 2.0916, + "step": 535 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019803407725899131, + "loss": 1.7287, + "step": 536 + }, + { + "epoch": 0.64, + "learning_rate": 0.00019802661986793342, + "loss": 2.0667, + "step": 537 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801914850041784, + "loss": 2.4016, + "step": 538 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019801166315750978, + "loss": 1.8557, + "step": 539 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001980041638402765, + "loss": 1.8072, + "step": 540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019799665054978722, + "loss": 2.2252, + "step": 541 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798912328711322, + "loss": 2.1377, + "step": 542 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019798158205332764, + "loss": 2.0306, + "step": 543 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019797402684950576, + "loss": 1.7428, + "step": 544 + }, + { + "epoch": 0.65, + "learning_rate": 0.00019796645767672477, + "loss": 2.0843, + "step": 545 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795887453606388, + "loss": 1.9175, + "step": 546 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019795127742860423, + "loss": 1.6673, + "step": 547 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001979436663554291, + "loss": 1.5553, + "step": 548 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019793604131762357, + "loss": 1.604, + "step": 549 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792840231627482, + "loss": 2.023, + "step": 550 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019792074935247206, + "loss": 1.8399, + "step": 551 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019791308242730638, + "loss": 1.8579, + "step": 552 + }, + { + "epoch": 0.66, + "learning_rate": 0.00019790540154187094, + "loss": 2.2135, + "step": 553 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019789770669726087, + "loss": 1.7894, + "step": 554 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788999789457326, + "loss": 2.1723, + "step": 555 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019788227513490723, + "loss": 2.0881, + "step": 556 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019787453841936393, + "loss": 1.7181, + "step": 557 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019786678774904638, + "loss": 1.8725, + "step": 558 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785902312505964, + "loss": 2.0544, + "step": 559 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019785124454851084, + "loss": 1.7503, + "step": 560 + }, + { + "epoch": 0.67, + "learning_rate": 0.000197843452020509, + "loss": 2.01, + "step": 561 + }, + { + "epoch": 0.67, + "learning_rate": 0.00019783564554216518, + "loss": 1.748, + "step": 562 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978278251145924, + "loss": 2.0866, + "step": 563 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001978199907389057, + "loss": 1.6046, + "step": 564 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019781214241622208, + "loss": 1.9222, + "step": 565 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019780428014766051, + "loss": 2.2003, + "step": 566 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019779640393434206, + "loss": 2.0534, + "step": 567 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001977885137773896, + "loss": 1.8609, + "step": 568 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019778060967792817, + "loss": 2.0666, + "step": 569 + }, + { + "epoch": 0.68, + "learning_rate": 0.00019777269163708468, + "loss": 1.9512, + "step": 570 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019776475965598814, + "loss": 1.8349, + "step": 571 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977568137357694, + "loss": 2.0507, + "step": 572 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019774885387756138, + "loss": 1.7588, + "step": 573 + }, + { + "epoch": 0.69, + "learning_rate": 0.000197740880082499, + "loss": 2.0981, + "step": 574 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019773289235171918, + "loss": 2.0953, + "step": 575 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019772489068636077, + "loss": 2.0678, + "step": 576 + }, + { + "epoch": 0.69, + "learning_rate": 0.00019771687508756466, + "loss": 2.0136, + "step": 577 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001977088455564736, + "loss": 1.9781, + "step": 578 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019770080209423254, + "loss": 2.2185, + "step": 579 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019769274470198827, + "loss": 1.8076, + "step": 580 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019768467338088957, + "loss": 1.6888, + "step": 581 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019767658813208726, + "loss": 2.1273, + "step": 582 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001976684889567341, + "loss": 2.3232, + "step": 583 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019766037585598487, + "loss": 2.366, + "step": 584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019765224883099635, + "loss": 1.8939, + "step": 585 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019764410788292722, + "loss": 2.0162, + "step": 586 + }, + { + "epoch": 0.7, + "learning_rate": 0.00019763595301293822, + "loss": 2.2752, + "step": 587 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976277842221921, + "loss": 1.9461, + "step": 588 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976196015118535, + "loss": 1.9999, + "step": 589 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001976114048830891, + "loss": 2.0169, + "step": 590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019760319433706757, + "loss": 2.1838, + "step": 591 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019759496987495955, + "loss": 2.3513, + "step": 592 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975867314979377, + "loss": 1.9915, + "step": 593 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001975784792071766, + "loss": 2.1973, + "step": 594 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019757021300385286, + "loss": 2.3112, + "step": 595 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019756193288914507, + "loss": 2.0992, + "step": 596 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019755363886423376, + "loss": 2.4266, + "step": 597 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019754533093030148, + "loss": 1.7649, + "step": 598 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975370090885328, + "loss": 1.7573, + "step": 599 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019752867334011423, + "loss": 1.7949, + "step": 600 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001975203236862342, + "loss": 2.0229, + "step": 601 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019751196012808325, + "loss": 2.0519, + "step": 602 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019750358266685383, + "loss": 2.0829, + "step": 603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019749519130374038, + "loss": 2.0153, + "step": 604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019748678603993933, + "loss": 1.8594, + "step": 605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019747836687664908, + "loss": 2.1385, + "step": 606 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746993381507003, + "loss": 2.1317, + "step": 607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019746148685640451, + "loss": 1.1676, + "step": 608 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974530260018569, + "loss": 2.2856, + "step": 609 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001974445512526336, + "loss": 2.1973, + "step": 610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019743606260994278, + "loss": 1.6912, + "step": 611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00019742756007499486, + "loss": 1.8091, + "step": 612 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741904364900208, + "loss": 2.0108, + "step": 613 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019741051333317867, + "loss": 2.1061, + "step": 614 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019740196912874087, + "loss": 1.8934, + "step": 615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019739341103690693, + "loss": 1.8599, + "step": 616 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019738483905889703, + "loss": 2.0025, + "step": 617 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019737625319593335, + "loss": 1.8247, + "step": 618 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019736765344924005, + "loss": 2.222, + "step": 619 + }, + { + "epoch": 0.74, + "learning_rate": 0.00019735903982004324, + "loss": 2.116, + "step": 620 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973504123095711, + "loss": 1.9183, + "step": 621 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973417709190536, + "loss": 2.1507, + "step": 622 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019733311564972296, + "loss": 1.7899, + "step": 623 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019732444650281315, + "loss": 2.1005, + "step": 624 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001973157634795602, + "loss": 2.2391, + "step": 625 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019730706658120214, + "loss": 1.9466, + "step": 626 + }, + { + "epoch": 0.75, + "learning_rate": 0.000197298355808979, + "loss": 1.9854, + "step": 627 + }, + { + "epoch": 0.75, + "eval_loss": 1.9957869052886963, + "eval_runtime": 282.5544, + "eval_samples_per_second": 0.729, + "eval_steps_per_second": 0.729, + "step": 627 + }, + { + "epoch": 0.75, + "learning_rate": 0.00019728963116413266, + "loss": 2.1877, + "step": 628 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019728089264790712, + "loss": 2.2194, + "step": 629 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019727214026154827, + "loss": 1.9631, + "step": 630 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019726337400630405, + "loss": 2.3506, + "step": 631 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019725459388342432, + "loss": 2.0543, + "step": 632 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972457998941609, + "loss": 2.0402, + "step": 633 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019723699203976766, + "loss": 1.9316, + "step": 634 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001972281703215004, + "loss": 2.2024, + "step": 635 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721933474061692, + "loss": 1.6776, + "step": 636 + }, + { + "epoch": 0.76, + "learning_rate": 0.00019721048529837694, + "loss": 1.9757, + "step": 637 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019720162199604222, + "loss": 1.7631, + "step": 638 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019719274483487648, + "loss": 2.34, + "step": 639 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001971838538161454, + "loss": 1.8469, + "step": 640 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019717494894111662, + "loss": 2.3151, + "step": 641 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019716603021105987, + "loss": 2.0661, + "step": 642 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019715709762724667, + "loss": 2.0408, + "step": 643 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019714815119095062, + "loss": 1.9848, + "step": 644 + }, + { + "epoch": 0.77, + "learning_rate": 0.00019713919090344736, + "loss": 2.3134, + "step": 645 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019713021676601438, + "loss": 2.4947, + "step": 646 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001971212287799312, + "loss": 2.0515, + "step": 647 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019711222694647932, + "loss": 2.6216, + "step": 648 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019710321126694216, + "loss": 1.6517, + "step": 649 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001970941817426052, + "loss": 2.0408, + "step": 650 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019708513837475588, + "loss": 1.8841, + "step": 651 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019707608116468356, + "loss": 2.1966, + "step": 652 + }, + { + "epoch": 0.78, + "learning_rate": 0.00019706701011367955, + "loss": 1.7587, + "step": 653 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970579252230373, + "loss": 2.2196, + "step": 654 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019704882649405198, + "loss": 1.8146, + "step": 655 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703971392802098, + "loss": 2.2932, + "step": 656 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019703058752624353, + "loss": 1.923, + "step": 657 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001970214472900208, + "loss": 2.2393, + "step": 658 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019701229322065605, + "loss": 1.7338, + "step": 659 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019700312531945442, + "loss": 1.7859, + "step": 660 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019699394358772306, + "loss": 2.2719, + "step": 661 + }, + { + "epoch": 0.79, + "learning_rate": 0.00019698474802677107, + "loss": 1.576, + "step": 662 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019697553863790956, + "loss": 2.3333, + "step": 663 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019696631542245156, + "loss": 2.3508, + "step": 664 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019695707838171216, + "loss": 2.1876, + "step": 665 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019694782751700828, + "loss": 1.4863, + "step": 666 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019693856282965898, + "loss": 1.8948, + "step": 667 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019692928432098512, + "loss": 1.6867, + "step": 668 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691999199230963, + "loss": 1.7682, + "step": 669 + }, + { + "epoch": 0.8, + "learning_rate": 0.00019691068584495742, + "loss": 2.0914, + "step": 670 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019690136588025535, + "loss": 2.1413, + "step": 671 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019689203209953223, + "loss": 2.1275, + "step": 672 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001968826845041188, + "loss": 1.9556, + "step": 673 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019687332309534792, + "loss": 2.2209, + "step": 674 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019686394787455424, + "loss": 1.9853, + "step": 675 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019685455884307454, + "loss": 2.0877, + "step": 676 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019684515600224743, + "loss": 2.1607, + "step": 677 + }, + { + "epoch": 0.81, + "learning_rate": 0.00019683573935341358, + "loss": 2.2664, + "step": 678 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019682630889791556, + "loss": 1.8527, + "step": 679 + }, + { + "epoch": 0.82, + "learning_rate": 0.000196816864637098, + "loss": 1.8417, + "step": 680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019680740657230738, + "loss": 1.9853, + "step": 681 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019679793470489228, + "loss": 1.8419, + "step": 682 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019678844903620317, + "loss": 1.9971, + "step": 683 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019677894956759246, + "loss": 1.9843, + "step": 684 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019676943630041462, + "loss": 2.376, + "step": 685 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675990923602598, + "loss": 2.1558, + "step": 686 + }, + { + "epoch": 0.82, + "learning_rate": 0.00019675036837578494, + "loss": 1.5752, + "step": 687 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001967408137210518, + "loss": 1.6704, + "step": 688 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019673124527318881, + "loss": 2.1389, + "step": 689 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019672166303356028, + "loss": 2.126, + "step": 690 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019671206700353237, + "loss": 1.9402, + "step": 691 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019670245718447335, + "loss": 1.6701, + "step": 692 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019669283357775328, + "loss": 1.8134, + "step": 693 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001966831961847443, + "loss": 2.1642, + "step": 694 + }, + { + "epoch": 0.83, + "learning_rate": 0.00019667354500682054, + "loss": 1.8455, + "step": 695 + }, + { + "epoch": 0.84, + "learning_rate": 0.000196663880045358, + "loss": 1.9646, + "step": 696 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966542013017347, + "loss": 1.9855, + "step": 697 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019664450877733062, + "loss": 1.7029, + "step": 698 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019663480247352773, + "loss": 1.9789, + "step": 699 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001966250823917099, + "loss": 1.8751, + "step": 700 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019661534853326301, + "loss": 2.3644, + "step": 701 + }, + { + "epoch": 0.84, + "learning_rate": 0.00019660560089957492, + "loss": 1.8006, + "step": 702 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001965958394920354, + "loss": 2.2799, + "step": 703 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019658606431203622, + "loss": 1.9258, + "step": 704 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965762753609711, + "loss": 1.9521, + "step": 705 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019656647264023575, + "loss": 1.9675, + "step": 706 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019655665615122783, + "loss": 2.3686, + "step": 707 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019654682589534693, + "loss": 2.1448, + "step": 708 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019653698187399466, + "loss": 2.2475, + "step": 709 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965271240885745, + "loss": 1.9417, + "step": 710 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001965172525404921, + "loss": 2.154, + "step": 711 + }, + { + "epoch": 0.85, + "learning_rate": 0.00019650736723115475, + "loss": 2.0646, + "step": 712 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019649746816197196, + "loss": 2.235, + "step": 713 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019648755533435518, + "loss": 1.7122, + "step": 714 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019647762874971765, + "loss": 2.0635, + "step": 715 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019646768840947474, + "loss": 1.8904, + "step": 716 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019645773431504373, + "loss": 1.608, + "step": 717 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019644776646784388, + "loss": 2.2307, + "step": 718 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001964377848692963, + "loss": 2.176, + "step": 719 + }, + { + "epoch": 0.86, + "learning_rate": 0.00019642778952082426, + "loss": 2.1984, + "step": 720 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001964177804238528, + "loss": 2.2625, + "step": 721 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019640775757980903, + "loss": 2.3142, + "step": 722 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019639772099012197, + "loss": 2.2366, + "step": 723 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019638767065622266, + "loss": 1.7823, + "step": 724 + }, + { + "epoch": 0.87, + "learning_rate": 0.000196377606579544, + "loss": 2.0677, + "step": 725 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019636752876152095, + "loss": 1.3337, + "step": 726 + }, + { + "epoch": 0.87, + "learning_rate": 0.00019635743720359037, + "loss": 2.055, + "step": 727 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001963473319071911, + "loss": 1.9888, + "step": 728 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019633721287376393, + "loss": 1.9258, + "step": 729 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019632708010475165, + "loss": 2.3768, + "step": 730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001963169336015989, + "loss": 1.993, + "step": 731 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019630677336575242, + "loss": 2.1989, + "step": 732 + }, + { + "epoch": 0.88, + "learning_rate": 0.0001962965993986608, + "loss": 2.1216, + "step": 733 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019628641170177464, + "loss": 2.2217, + "step": 734 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019627621027654648, + "loss": 1.8809, + "step": 735 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019626599512443077, + "loss": 2.0864, + "step": 736 + }, + { + "epoch": 0.88, + "learning_rate": 0.00019625576624688406, + "loss": 2.0627, + "step": 737 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019624552364536473, + "loss": 2.1347, + "step": 738 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019623526732133315, + "loss": 1.9998, + "step": 739 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019622499727625162, + "loss": 2.1998, + "step": 740 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019621471351158443, + "loss": 1.974, + "step": 741 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019620441602879787, + "loss": 1.9425, + "step": 742 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019619410482936008, + "loss": 2.6227, + "step": 743 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019618377991474124, + "loss": 2.1209, + "step": 744 + }, + { + "epoch": 0.89, + "learning_rate": 0.00019617344128641345, + "loss": 2.0606, + "step": 745 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019616308894585078, + "loss": 2.296, + "step": 746 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019615272289452923, + "loss": 2.0415, + "step": 747 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961423431339268, + "loss": 1.9516, + "step": 748 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961319496655234, + "loss": 2.0468, + "step": 749 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961215424908009, + "loss": 1.877, + "step": 750 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001961111216112432, + "loss": 1.8129, + "step": 751 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019610068702833596, + "loss": 1.9984, + "step": 752 + }, + { + "epoch": 0.9, + "learning_rate": 0.00019609023874356707, + "loss": 1.9013, + "step": 753 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019607977675842615, + "loss": 2.0546, + "step": 754 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019606930107440485, + "loss": 2.2817, + "step": 755 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960588116929968, + "loss": 2.0578, + "step": 756 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019604830861569755, + "loss": 2.3521, + "step": 757 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019603779184400457, + "loss": 2.0392, + "step": 758 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960272613794174, + "loss": 1.9863, + "step": 759 + }, + { + "epoch": 0.91, + "learning_rate": 0.00019601671722343738, + "loss": 2.1889, + "step": 760 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001960061593775679, + "loss": 2.0908, + "step": 761 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001959955878433143, + "loss": 1.986, + "step": 762 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019598500262218386, + "loss": 2.0339, + "step": 763 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019597440371568574, + "loss": 2.0958, + "step": 764 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001959637911253312, + "loss": 1.9866, + "step": 765 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019595316485263327, + "loss": 2.2228, + "step": 766 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019594252489910706, + "loss": 1.915, + "step": 767 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019593187126626965, + "loss": 2.0741, + "step": 768 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019592120395563994, + "loss": 2.5346, + "step": 769 + }, + { + "epoch": 0.92, + "learning_rate": 0.00019591052296873888, + "loss": 2.4908, + "step": 770 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019589982830708937, + "loss": 2.1042, + "step": 771 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019588911997221625, + "loss": 1.8676, + "step": 772 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958783979656462, + "loss": 1.9152, + "step": 773 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019586766228890806, + "loss": 1.7784, + "step": 774 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958569129435324, + "loss": 2.0784, + "step": 775 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001958461499310519, + "loss": 1.7262, + "step": 776 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019583537325300118, + "loss": 2.4154, + "step": 777 + }, + { + "epoch": 0.93, + "learning_rate": 0.00019582458291091663, + "loss": 2.3185, + "step": 778 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019581377890633684, + "loss": 2.0981, + "step": 779 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019580296124080212, + "loss": 1.8952, + "step": 780 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019579212991585493, + "loss": 1.7208, + "step": 781 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019578128493303955, + "loss": 2.0209, + "step": 782 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019577042629390217, + "loss": 2.1867, + "step": 783 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957595539999911, + "loss": 2.0805, + "step": 784 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019574866805285645, + "loss": 2.0451, + "step": 785 + }, + { + "epoch": 0.94, + "learning_rate": 0.00019573776845405028, + "loss": 2.2056, + "step": 786 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001957268552051267, + "loss": 2.0773, + "step": 787 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019571592830764165, + "loss": 2.2036, + "step": 788 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019570498776315309, + "loss": 1.7298, + "step": 789 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956940335732209, + "loss": 1.8931, + "step": 790 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001956830657394069, + "loss": 2.1567, + "step": 791 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019567208426327488, + "loss": 1.9471, + "step": 792 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019566108914639054, + "loss": 1.8916, + "step": 793 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019565008039032158, + "loss": 2.0111, + "step": 794 + }, + { + "epoch": 0.95, + "learning_rate": 0.00019563905799663752, + "loss": 2.1374, + "step": 795 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019562802196691003, + "loss": 2.3083, + "step": 796 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019561697230271254, + "loss": 2.0381, + "step": 797 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001956059090056205, + "loss": 2.1909, + "step": 798 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019559483207721133, + "loss": 1.9893, + "step": 799 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955837415190643, + "loss": 2.3178, + "step": 800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001955726373327607, + "loss": 2.0815, + "step": 801 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019556151951988376, + "loss": 1.6012, + "step": 802 + }, + { + "epoch": 0.96, + "learning_rate": 0.00019555038808201865, + "loss": 1.4965, + "step": 803 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019553924302075242, + "loss": 2.3069, + "step": 804 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019552808433767415, + "loss": 2.2388, + "step": 805 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019551691203437482, + "loss": 2.5662, + "step": 806 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019550572611244738, + "loss": 1.9419, + "step": 807 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019549452657348663, + "loss": 2.3638, + "step": 808 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019548331341908947, + "loss": 2.1567, + "step": 809 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019547208665085457, + "loss": 1.9697, + "step": 810 + }, + { + "epoch": 0.97, + "learning_rate": 0.00019546084627038268, + "loss": 1.9006, + "step": 811 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001954495922792764, + "loss": 2.304, + "step": 812 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001954383246791403, + "loss": 2.0494, + "step": 813 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019542704347158093, + "loss": 1.8562, + "step": 814 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019541574865820672, + "loss": 2.1041, + "step": 815 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019540444024062804, + "loss": 2.22, + "step": 816 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019539311822045727, + "loss": 1.9925, + "step": 817 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019538178259930869, + "loss": 2.3213, + "step": 818 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019537043337879845, + "loss": 2.0319, + "step": 819 + }, + { + "epoch": 0.98, + "learning_rate": 0.00019535907056054475, + "loss": 1.8578, + "step": 820 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019534769414616764, + "loss": 1.4115, + "step": 821 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953363041372892, + "loss": 2.0731, + "step": 822 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019532490053553335, + "loss": 2.0605, + "step": 823 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019531348334252607, + "loss": 1.9044, + "step": 824 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001953020525598951, + "loss": 1.7405, + "step": 825 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001952906081892703, + "loss": 1.898, + "step": 826 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019527915023228332, + "loss": 1.9696, + "step": 827 + }, + { + "epoch": 0.99, + "learning_rate": 0.00019526767869056788, + "loss": 2.0469, + "step": 828 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019525619356575952, + "loss": 2.0307, + "step": 829 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019524469485949583, + "loss": 2.002, + "step": 830 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019523318257341622, + "loss": 1.9438, + "step": 831 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019522165670916207, + "loss": 1.535, + "step": 832 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001952101172683768, + "loss": 1.7505, + "step": 833 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019519856425270562, + "loss": 2.2248, + "step": 834 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019518699766379576, + "loss": 2.0669, + "step": 835 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019517541750329635, + "loss": 2.0268, + "step": 836 + }, + { + "epoch": 1.0, + "eval_loss": 1.9969017505645752, + "eval_runtime": 283.3157, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 836 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019516382377285848, + "loss": 1.6712, + "step": 837 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001951522164741352, + "loss": 2.1558, + "step": 838 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019514059560878138, + "loss": 2.1599, + "step": 839 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019512896117845392, + "loss": 1.8762, + "step": 840 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019511731318481168, + "loss": 2.0189, + "step": 841 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019510565162951537, + "loss": 1.9364, + "step": 842 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019509397651422769, + "loss": 1.7319, + "step": 843 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019508228784061326, + "loss": 1.9424, + "step": 844 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001950705856103386, + "loss": 2.277, + "step": 845 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019505886982507225, + "loss": 1.6511, + "step": 846 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950471404864846, + "loss": 1.9056, + "step": 847 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019503539759624798, + "loss": 1.5105, + "step": 848 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001950236411560367, + "loss": 1.9469, + "step": 849 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019501187116752693, + "loss": 1.5012, + "step": 850 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019500008763239683, + "loss": 1.7086, + "step": 851 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019498829055232647, + "loss": 1.5586, + "step": 852 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019497647992899788, + "loss": 1.5573, + "step": 853 + }, + { + "epoch": 1.01, + "learning_rate": 0.000194964655764095, + "loss": 2.0757, + "step": 854 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019495281805930367, + "loss": 1.5478, + "step": 855 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019494096681631172, + "loss": 1.7068, + "step": 856 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019492910203680884, + "loss": 1.6759, + "step": 857 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001949172237224867, + "loss": 1.4621, + "step": 858 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019490533187503892, + "loss": 1.5359, + "step": 859 + }, + { + "epoch": 1.02, + "learning_rate": 0.000194893426496161, + "loss": 1.9365, + "step": 860 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019488150758755035, + "loss": 1.7089, + "step": 861 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019486957515090641, + "loss": 1.4924, + "step": 862 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019485762918793046, + "loss": 1.387, + "step": 863 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001948456697003257, + "loss": 1.631, + "step": 864 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019483369668979732, + "loss": 1.7953, + "step": 865 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019482171015805245, + "loss": 1.7552, + "step": 866 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019480971010680002, + "loss": 1.8313, + "step": 867 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019479769653775106, + "loss": 1.593, + "step": 868 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019478566945261837, + "loss": 1.9506, + "step": 869 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019477362885311682, + "loss": 1.9598, + "step": 870 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947615747409631, + "loss": 1.7324, + "step": 871 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019474950711787585, + "loss": 2.1208, + "step": 872 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001947374259855757, + "loss": 1.4111, + "step": 873 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019472533134578507, + "loss": 1.6696, + "step": 874 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019471322320022849, + "loss": 1.6999, + "step": 875 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019470110155063225, + "loss": 2.1287, + "step": 876 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019468896639872468, + "loss": 1.874, + "step": 877 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019467681774623592, + "loss": 1.7149, + "step": 878 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019466465559489816, + "loss": 1.9563, + "step": 879 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019465247994644545, + "loss": 1.3504, + "step": 880 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019464029080261378, + "loss": 1.6176, + "step": 881 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019462808816514103, + "loss": 1.7577, + "step": 882 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019461587203576706, + "loss": 1.8054, + "step": 883 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019460364241623358, + "loss": 2.0246, + "step": 884 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019459139930828428, + "loss": 1.7645, + "step": 885 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945791427136648, + "loss": 1.9225, + "step": 886 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019456687263412262, + "loss": 1.8967, + "step": 887 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945545890714072, + "loss": 1.5287, + "step": 888 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945422920272699, + "loss": 1.5033, + "step": 889 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019452998150346401, + "loss": 2.0148, + "step": 890 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945176575017448, + "loss": 1.3706, + "step": 891 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001945053200238693, + "loss": 1.7603, + "step": 892 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019449296907159667, + "loss": 1.9884, + "step": 893 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019448060464668783, + "loss": 1.6133, + "step": 894 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019446822675090565, + "loss": 1.7885, + "step": 895 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019445583538601498, + "loss": 1.8573, + "step": 896 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944434305537826, + "loss": 1.7241, + "step": 897 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944310122559771, + "loss": 1.8942, + "step": 898 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001944185804943691, + "loss": 1.7541, + "step": 899 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019440613527073105, + "loss": 1.9608, + "step": 900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019439367658683745, + "loss": 2.0969, + "step": 901 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019438120444446457, + "loss": 2.2589, + "step": 902 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943687188453907, + "loss": 1.7335, + "step": 903 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019435621979139596, + "loss": 1.8663, + "step": 904 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019434370728426252, + "loss": 1.5627, + "step": 905 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943311813257743, + "loss": 1.6101, + "step": 906 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019431864191771732, + "loss": 1.9661, + "step": 907 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001943060890618794, + "loss": 1.6487, + "step": 908 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019429352276005026, + "loss": 2.1282, + "step": 909 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019428094301402162, + "loss": 1.6944, + "step": 910 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019426834982558705, + "loss": 1.2433, + "step": 911 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019425574319654213, + "loss": 1.5735, + "step": 912 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019424312312868417, + "loss": 1.6499, + "step": 913 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019423048962381265, + "loss": 1.8366, + "step": 914 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019421784268372876, + "loss": 1.906, + "step": 915 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019420518231023568, + "loss": 1.5976, + "step": 916 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941925085051385, + "loss": 1.6722, + "step": 917 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019417982127024422, + "loss": 1.8832, + "step": 918 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019416712060736183, + "loss": 1.8865, + "step": 919 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019415440651830208, + "loss": 1.6627, + "step": 920 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941416790048778, + "loss": 1.3598, + "step": 921 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019412893806890357, + "loss": 2.0506, + "step": 922 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019411618371219605, + "loss": 1.9794, + "step": 923 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001941034159365737, + "loss": 1.7851, + "step": 924 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001940906347438569, + "loss": 1.8312, + "step": 925 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019407784013586804, + "loss": 1.5167, + "step": 926 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019406503211443128, + "loss": 1.5725, + "step": 927 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019405221068137277, + "loss": 1.8857, + "step": 928 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019403937583852061, + "loss": 1.741, + "step": 929 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019402652758770475, + "loss": 1.6748, + "step": 930 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019401366593075706, + "loss": 1.7285, + "step": 931 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019400079086951135, + "loss": 1.7545, + "step": 932 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019398790240580333, + "loss": 1.4491, + "step": 933 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019397500054147058, + "loss": 1.3359, + "step": 934 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019396208527835263, + "loss": 1.9567, + "step": 935 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001939491566182909, + "loss": 2.0011, + "step": 936 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019393621456312881, + "loss": 1.9076, + "step": 937 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019392325911471155, + "loss": 1.5388, + "step": 938 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019391029027488629, + "loss": 1.2337, + "step": 939 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019389730804550211, + "loss": 1.5752, + "step": 940 + }, + { + "epoch": 1.11, + "learning_rate": 0.00019388431242840998, + "loss": 1.9131, + "step": 941 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019387130342546284, + "loss": 1.4177, + "step": 942 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019385828103851544, + "loss": 1.5865, + "step": 943 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001938452452694245, + "loss": 1.6335, + "step": 944 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019383219612004865, + "loss": 1.8599, + "step": 945 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019381913359224842, + "loss": 1.3035, + "step": 946 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019380605768788621, + "loss": 1.7586, + "step": 947 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001937929684088264, + "loss": 1.7334, + "step": 948 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019377986575693518, + "loss": 1.5749, + "step": 949 + }, + { + "epoch": 1.12, + "learning_rate": 0.00019376674973408075, + "loss": 1.874, + "step": 950 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019375362034213314, + "loss": 2.3055, + "step": 951 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019374047758296433, + "loss": 1.5801, + "step": 952 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001937273214584482, + "loss": 1.8788, + "step": 953 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019371415197046052, + "loss": 2.431, + "step": 954 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019370096912087897, + "loss": 1.4963, + "step": 955 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001936877729115831, + "loss": 1.514, + "step": 956 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019367456334445446, + "loss": 1.6099, + "step": 957 + }, + { + "epoch": 1.13, + "learning_rate": 0.00019366134042137642, + "loss": 1.9367, + "step": 958 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019364810414423427, + "loss": 1.7384, + "step": 959 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019363485451491524, + "loss": 1.6166, + "step": 960 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019362159153530844, + "loss": 1.955, + "step": 961 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019360831520730482, + "loss": 1.4189, + "step": 962 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019359502553279736, + "loss": 1.4506, + "step": 963 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019358172251368087, + "loss": 1.7108, + "step": 964 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019356840615185203, + "loss": 1.6641, + "step": 965 + }, + { + "epoch": 1.14, + "learning_rate": 0.00019355507644920952, + "loss": 1.7506, + "step": 966 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019354173340765382, + "loss": 2.0598, + "step": 967 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935283770290874, + "loss": 1.3494, + "step": 968 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019351500731541453, + "loss": 1.6571, + "step": 969 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001935016242685415, + "loss": 1.6403, + "step": 970 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019348822789037637, + "loss": 1.7555, + "step": 971 + }, + { + "epoch": 1.15, + "learning_rate": 0.00019347481818282925, + "loss": 2.1451, + "step": 972 + }, + { + "epoch": 1.15, + "learning_rate": 0.000193461395147812, + "loss": 1.4522, + "step": 973 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934479587872385, + "loss": 1.7147, + "step": 974 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001934345091030245, + "loss": 1.3909, + "step": 975 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019342104609708756, + "loss": 1.8104, + "step": 976 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019340756977134728, + "loss": 1.5221, + "step": 977 + }, + { + "epoch": 1.16, + "learning_rate": 0.000193394080127725, + "loss": 1.9447, + "step": 978 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001933805771681442, + "loss": 1.5742, + "step": 979 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019336706089452996, + "loss": 1.5312, + "step": 980 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019335353130880948, + "loss": 1.4304, + "step": 981 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019333998841291177, + "loss": 1.8379, + "step": 982 + }, + { + "epoch": 1.16, + "learning_rate": 0.00019332643220876773, + "loss": 1.877, + "step": 983 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001933128626983102, + "loss": 1.9627, + "step": 984 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932992798834739, + "loss": 1.7857, + "step": 985 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019328568376619543, + "loss": 1.3189, + "step": 986 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019327207434841333, + "loss": 1.9588, + "step": 987 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019325845163206795, + "loss": 1.3132, + "step": 988 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019324481561910163, + "loss": 1.6304, + "step": 989 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001932311663114586, + "loss": 1.8322, + "step": 990 + }, + { + "epoch": 1.17, + "learning_rate": 0.00019321750371108486, + "loss": 1.4192, + "step": 991 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001932038278199285, + "loss": 1.3915, + "step": 992 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019319013863993933, + "loss": 1.8433, + "step": 993 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931764361730692, + "loss": 2.1459, + "step": 994 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001931627204212717, + "loss": 1.9799, + "step": 995 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019314899138650243, + "loss": 1.855, + "step": 996 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019313524907071887, + "loss": 1.4763, + "step": 997 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019312149347588037, + "loss": 2.0128, + "step": 998 + }, + { + "epoch": 1.18, + "learning_rate": 0.00019310772460394814, + "loss": 1.6964, + "step": 999 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001930939424568854, + "loss": 1.5864, + "step": 1000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019308014703665712, + "loss": 1.8437, + "step": 1001 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019306633834523024, + "loss": 2.1677, + "step": 1002 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019305251638457356, + "loss": 1.8872, + "step": 1003 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930386811566578, + "loss": 1.7312, + "step": 1004 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001930248326634556, + "loss": 1.6772, + "step": 1005 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019301097090694143, + "loss": 1.9666, + "step": 1006 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019299709588909165, + "loss": 1.8946, + "step": 1007 + }, + { + "epoch": 1.19, + "learning_rate": 0.00019298320761188453, + "loss": 2.1784, + "step": 1008 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001929693060773003, + "loss": 2.0249, + "step": 1009 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019295539128732093, + "loss": 1.717, + "step": 1010 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019294146324393046, + "loss": 1.8671, + "step": 1011 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019292752194911464, + "loss": 1.8388, + "step": 1012 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019291356740486123, + "loss": 1.9111, + "step": 1013 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019289959961315986, + "loss": 1.5287, + "step": 1014 + }, + { + "epoch": 1.2, + "learning_rate": 0.000192885618576002, + "loss": 1.5669, + "step": 1015 + }, + { + "epoch": 1.2, + "learning_rate": 0.00019287162429538105, + "loss": 1.9095, + "step": 1016 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019285761677329232, + "loss": 1.9133, + "step": 1017 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019284359601173294, + "loss": 2.1099, + "step": 1018 + }, + { + "epoch": 1.21, + "learning_rate": 0.000192829562012702, + "loss": 1.6303, + "step": 1019 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019281551477820036, + "loss": 1.5907, + "step": 1020 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019280145431023097, + "loss": 1.4897, + "step": 1021 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019278738061079845, + "loss": 1.7414, + "step": 1022 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019277329368190942, + "loss": 1.816, + "step": 1023 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019275919352557241, + "loss": 1.5033, + "step": 1024 + }, + { + "epoch": 1.21, + "learning_rate": 0.00019274508014379777, + "loss": 1.7923, + "step": 1025 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019273095353859775, + "loss": 1.3094, + "step": 1026 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019271681371198652, + "loss": 1.7689, + "step": 1027 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001927026606659801, + "loss": 1.8019, + "step": 1028 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019268849440259639, + "loss": 1.8818, + "step": 1029 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019267431492385521, + "loss": 1.7442, + "step": 1030 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019266012223177824, + "loss": 2.045, + "step": 1031 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019264591632838903, + "loss": 1.7842, + "step": 1032 + }, + { + "epoch": 1.22, + "learning_rate": 0.00019263169721571308, + "loss": 1.5289, + "step": 1033 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019261746489577765, + "loss": 1.6013, + "step": 1034 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019260321937061202, + "loss": 1.7912, + "step": 1035 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925889606422473, + "loss": 1.7573, + "step": 1036 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001925746887127164, + "loss": 1.7368, + "step": 1037 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019256040358405424, + "loss": 1.7497, + "step": 1038 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019254610525829758, + "loss": 2.0042, + "step": 1039 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019253179373748504, + "loss": 2.0732, + "step": 1040 + }, + { + "epoch": 1.23, + "learning_rate": 0.00019251746902365708, + "loss": 1.8878, + "step": 1041 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019250313111885618, + "loss": 1.9404, + "step": 1042 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019248878002512654, + "loss": 1.5535, + "step": 1043 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019247441574451432, + "loss": 1.9344, + "step": 1044 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001924600382790676, + "loss": 1.9696, + "step": 1045 + }, + { + "epoch": 1.24, + "eval_loss": 2.064669609069824, + "eval_runtime": 283.003, + "eval_samples_per_second": 0.728, + "eval_steps_per_second": 0.728, + "step": 1045 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019244564763083624, + "loss": 1.4577, + "step": 1046 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019243124380187204, + "loss": 2.1324, + "step": 1047 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019241682679422873, + "loss": 1.4713, + "step": 1048 + }, + { + "epoch": 1.24, + "learning_rate": 0.00019240239660996177, + "loss": 1.7455, + "step": 1049 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001923879532511287, + "loss": 1.5372, + "step": 1050 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019237349671978872, + "loss": 2.0984, + "step": 1051 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923590270180031, + "loss": 1.5023, + "step": 1052 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001923445441478348, + "loss": 2.0826, + "step": 1053 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019233004811134886, + "loss": 1.7448, + "step": 1054 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019231553891061208, + "loss": 2.0249, + "step": 1055 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019230101654769312, + "loss": 1.6144, + "step": 1056 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001922864810246626, + "loss": 1.9193, + "step": 1057 + }, + { + "epoch": 1.25, + "learning_rate": 0.00019227193234359292, + "loss": 2.0057, + "step": 1058 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019225737050655842, + "loss": 1.9493, + "step": 1059 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019224279551563532, + "loss": 1.9545, + "step": 1060 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001922282073729017, + "loss": 1.8983, + "step": 1061 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019221360608043746, + "loss": 1.9414, + "step": 1062 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019219899164032447, + "loss": 1.8471, + "step": 1063 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921843640546464, + "loss": 1.7568, + "step": 1064 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019216972332548887, + "loss": 2.0737, + "step": 1065 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001921550694549393, + "loss": 1.6109, + "step": 1066 + }, + { + "epoch": 1.27, + "learning_rate": 0.000192140402445087, + "loss": 1.6684, + "step": 1067 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001921257222980232, + "loss": 1.5101, + "step": 1068 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019211102901584094, + "loss": 1.5262, + "step": 1069 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920963226006352, + "loss": 1.9757, + "step": 1070 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019208160305450272, + "loss": 2.038, + "step": 1071 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019206687037954224, + "loss": 1.4755, + "step": 1072 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019205212457785434, + "loss": 1.7406, + "step": 1073 + }, + { + "epoch": 1.27, + "learning_rate": 0.00019203736565154137, + "loss": 1.9564, + "step": 1074 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001920225936027077, + "loss": 1.823, + "step": 1075 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001920078084334595, + "loss": 1.8275, + "step": 1076 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919930101459048, + "loss": 1.7106, + "step": 1077 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019197819874215347, + "loss": 1.5958, + "step": 1078 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019196337422431735, + "loss": 2.1478, + "step": 1079 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001919485365945101, + "loss": 1.7238, + "step": 1080 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019193368585484718, + "loss": 2.0758, + "step": 1081 + }, + { + "epoch": 1.28, + "learning_rate": 0.000191918822007446, + "loss": 1.8403, + "step": 1082 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019190394505442585, + "loss": 1.8286, + "step": 1083 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019188905499790789, + "loss": 1.6992, + "step": 1084 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019187415184001503, + "loss": 1.8512, + "step": 1085 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918592355828722, + "loss": 1.8236, + "step": 1086 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918443062286061, + "loss": 1.6173, + "step": 1087 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019182936377934535, + "loss": 1.8593, + "step": 1088 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001918144082372204, + "loss": 1.8184, + "step": 1089 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019179943960436358, + "loss": 1.9655, + "step": 1090 + }, + { + "epoch": 1.29, + "learning_rate": 0.00019178445788290915, + "loss": 1.5858, + "step": 1091 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019176946307499312, + "loss": 1.8359, + "step": 1092 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917544551827534, + "loss": 1.4354, + "step": 1093 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019173943420832984, + "loss": 1.4312, + "step": 1094 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001917244001538641, + "loss": 2.0024, + "step": 1095 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019170935302149965, + "loss": 1.5994, + "step": 1096 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019169429281338195, + "loss": 2.05, + "step": 1097 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019167921953165825, + "loss": 1.8746, + "step": 1098 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019166413317847763, + "loss": 2.0071, + "step": 1099 + }, + { + "epoch": 1.3, + "learning_rate": 0.00019164903375599112, + "loss": 2.0331, + "step": 1100 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019163392126635154, + "loss": 1.3587, + "step": 1101 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019161879571171362, + "loss": 1.6144, + "step": 1102 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019160365709423388, + "loss": 1.4845, + "step": 1103 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019158850541607083, + "loss": 1.4511, + "step": 1104 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019157334067938474, + "loss": 1.8015, + "step": 1105 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019155816288633776, + "loss": 1.5029, + "step": 1106 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019154297203909394, + "loss": 1.7102, + "step": 1107 + }, + { + "epoch": 1.31, + "learning_rate": 0.00019152776813981912, + "loss": 1.6661, + "step": 1108 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001915125511906811, + "loss": 1.5872, + "step": 1109 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019149732119384943, + "loss": 1.7868, + "step": 1110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914820781514956, + "loss": 1.6365, + "step": 1111 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914668220657929, + "loss": 2.3434, + "step": 1112 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914515529389166, + "loss": 1.6458, + "step": 1113 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001914362707730437, + "loss": 1.7061, + "step": 1114 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019142097557035308, + "loss": 1.8606, + "step": 1115 + }, + { + "epoch": 1.32, + "learning_rate": 0.00019140566733302552, + "loss": 1.9415, + "step": 1116 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019139034606324362, + "loss": 1.7411, + "step": 1117 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019137501176319193, + "loss": 1.9404, + "step": 1118 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913596644350567, + "loss": 1.802, + "step": 1119 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019134430408102615, + "loss": 1.2244, + "step": 1120 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019132893070329036, + "loss": 1.902, + "step": 1121 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001913135443040412, + "loss": 1.4578, + "step": 1122 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019129814488547247, + "loss": 1.6816, + "step": 1123 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001912827324497798, + "loss": 1.7293, + "step": 1124 + }, + { + "epoch": 1.33, + "learning_rate": 0.00019126730699916061, + "loss": 1.6344, + "step": 1125 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001912518685358143, + "loss": 1.6819, + "step": 1126 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019123641706194199, + "loss": 1.6761, + "step": 1127 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019122095257974677, + "loss": 1.9222, + "step": 1128 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019120547509143354, + "loss": 1.6117, + "step": 1129 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019118998459920902, + "loss": 1.688, + "step": 1130 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019117448110528184, + "loss": 1.8383, + "step": 1131 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019115896461186245, + "loss": 1.5225, + "step": 1132 + }, + { + "epoch": 1.34, + "learning_rate": 0.00019114343512116318, + "loss": 2.0376, + "step": 1133 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019112789263539813, + "loss": 1.5632, + "step": 1134 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019111233715678343, + "loss": 1.7049, + "step": 1135 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910967686875369, + "loss": 1.4992, + "step": 1136 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019108118722987826, + "loss": 1.7949, + "step": 1137 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019106559278602903, + "loss": 1.4688, + "step": 1138 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019104998535821274, + "loss": 1.4031, + "step": 1139 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001910343649486546, + "loss": 2.1757, + "step": 1140 + }, + { + "epoch": 1.35, + "learning_rate": 0.00019101873155958179, + "loss": 1.622, + "step": 1141 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019100308519322322, + "loss": 1.9441, + "step": 1142 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001909874258518098, + "loss": 1.8065, + "step": 1143 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019097175353757417, + "loss": 1.8348, + "step": 1144 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019095606825275083, + "loss": 2.0519, + "step": 1145 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019094036999957624, + "loss": 1.9172, + "step": 1146 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019092465878028854, + "loss": 1.9961, + "step": 1147 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019090893459712787, + "loss": 2.1239, + "step": 1148 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019089319745233611, + "loss": 1.3481, + "step": 1149 + }, + { + "epoch": 1.36, + "learning_rate": 0.00019087744734815708, + "loss": 1.5035, + "step": 1150 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019086168428683638, + "loss": 1.818, + "step": 1151 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019084590827062145, + "loss": 2.0481, + "step": 1152 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019083011930176165, + "loss": 1.4444, + "step": 1153 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019081431738250814, + "loss": 1.6059, + "step": 1154 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907985025151139, + "loss": 2.0284, + "step": 1155 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001907826747018338, + "loss": 1.8603, + "step": 1156 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019076683394492455, + "loss": 1.7189, + "step": 1157 + }, + { + "epoch": 1.37, + "learning_rate": 0.00019075098024664468, + "loss": 1.7497, + "step": 1158 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019073511360925458, + "loss": 1.7489, + "step": 1159 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001907192340350165, + "loss": 1.6059, + "step": 1160 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019070334152619453, + "loss": 1.4407, + "step": 1161 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019068743608505455, + "loss": 1.7025, + "step": 1162 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019067151771386438, + "loss": 1.7921, + "step": 1163 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906555864148936, + "loss": 1.6147, + "step": 1164 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001906396421904137, + "loss": 1.6192, + "step": 1165 + }, + { + "epoch": 1.38, + "learning_rate": 0.00019062368504269795, + "loss": 1.4341, + "step": 1166 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019060771497402147, + "loss": 1.3054, + "step": 1167 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001905917319866613, + "loss": 2.041, + "step": 1168 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019057573608289623, + "loss": 2.004, + "step": 1169 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019055972726500695, + "loss": 1.4002, + "step": 1170 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019054370553527595, + "loss": 1.5554, + "step": 1171 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019052767089598754, + "loss": 1.9783, + "step": 1172 + }, + { + "epoch": 1.39, + "learning_rate": 0.000190511623349428, + "loss": 1.7443, + "step": 1173 + }, + { + "epoch": 1.39, + "learning_rate": 0.00019049556289788528, + "loss": 1.6089, + "step": 1174 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001904794895436493, + "loss": 1.8784, + "step": 1175 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904634032890117, + "loss": 2.0985, + "step": 1176 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904473041362661, + "loss": 1.811, + "step": 1177 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019043119208770793, + "loss": 1.407, + "step": 1178 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001904150671456343, + "loss": 1.7269, + "step": 1179 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019039892931234435, + "loss": 1.8374, + "step": 1180 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019038277859013896, + "loss": 1.583, + "step": 1181 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019036661498132086, + "loss": 1.6407, + "step": 1182 + }, + { + "epoch": 1.4, + "learning_rate": 0.00019035043848819464, + "loss": 2.0828, + "step": 1183 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019033424911306672, + "loss": 1.7067, + "step": 1184 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019031804685824534, + "loss": 1.55, + "step": 1185 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001903018317260406, + "loss": 1.7573, + "step": 1186 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019028560371876446, + "loss": 1.5666, + "step": 1187 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001902693628387306, + "loss": 1.5192, + "step": 1188 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019025310908825466, + "loss": 2.0093, + "step": 1189 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019023684246965406, + "loss": 1.8414, + "step": 1190 + }, + { + "epoch": 1.41, + "learning_rate": 0.00019022056298524808, + "loss": 1.3696, + "step": 1191 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019020427063735782, + "loss": 1.6336, + "step": 1192 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019018796542830617, + "loss": 1.8528, + "step": 1193 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019017164736041795, + "loss": 2.0523, + "step": 1194 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019015531643601973, + "loss": 1.7526, + "step": 1195 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019013897265743998, + "loss": 1.8391, + "step": 1196 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019012261602700892, + "loss": 1.4257, + "step": 1197 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019010624654705867, + "loss": 2.0911, + "step": 1198 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001900898642199232, + "loss": 1.7578, + "step": 1199 + }, + { + "epoch": 1.42, + "learning_rate": 0.00019007346904793818, + "loss": 1.9003, + "step": 1200 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900570610334413, + "loss": 1.3918, + "step": 1201 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001900406401787719, + "loss": 2.0365, + "step": 1202 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019002420648627131, + "loss": 1.5184, + "step": 1203 + }, + { + "epoch": 1.43, + "learning_rate": 0.00019000775995828254, + "loss": 1.6412, + "step": 1204 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018999130059715058, + "loss": 1.5031, + "step": 1205 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018997482840522217, + "loss": 1.4421, + "step": 1206 + }, + { + "epoch": 1.43, + "learning_rate": 0.00018995834338484584, + "loss": 1.9431, + "step": 1207 + }, + { + "epoch": 1.43, + "learning_rate": 0.000189941845538372, + "loss": 1.8141, + "step": 1208 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001899253348681529, + "loss": 1.7289, + "step": 1209 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018990881137654258, + "loss": 1.7217, + "step": 1210 + }, + { + "epoch": 1.44, + "learning_rate": 0.000189892275065897, + "loss": 2.3727, + "step": 1211 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018987572593857381, + "loss": 1.4833, + "step": 1212 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018985916399693256, + "loss": 2.13, + "step": 1213 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018984258924333464, + "loss": 1.875, + "step": 1214 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018982600168014323, + "loss": 1.783, + "step": 1215 + }, + { + "epoch": 1.44, + "learning_rate": 0.00018980940130972337, + "loss": 1.6815, + "step": 1216 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897927881344419, + "loss": 2.049, + "step": 1217 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018977616215666752, + "loss": 1.918, + "step": 1218 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897595233787707, + "loss": 1.5824, + "step": 1219 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018974287180312377, + "loss": 1.7473, + "step": 1220 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018972620743210093, + "loss": 1.6915, + "step": 1221 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001897095302680781, + "loss": 1.7633, + "step": 1222 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018969284031343308, + "loss": 1.6921, + "step": 1223 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018967613757054554, + "loss": 1.5433, + "step": 1224 + }, + { + "epoch": 1.45, + "learning_rate": 0.00018965942204179686, + "loss": 1.9389, + "step": 1225 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018964269372957038, + "loss": 1.5625, + "step": 1226 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018962595263625115, + "loss": 1.4835, + "step": 1227 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018960919876422611, + "loss": 1.8479, + "step": 1228 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018959243211588397, + "loss": 1.7861, + "step": 1229 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018957565269361531, + "loss": 1.867, + "step": 1230 + }, + { + "epoch": 1.46, + "learning_rate": 0.00018955886049981245, + "loss": 1.9383, + "step": 1231 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001895420555368697, + "loss": 1.755, + "step": 1232 + }, + { + "epoch": 1.46, + "learning_rate": 0.000189525237807183, + "loss": 1.5166, + "step": 1233 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018950840731315024, + "loss": 1.8629, + "step": 1234 + }, + { + "epoch": 1.47, + "learning_rate": 0.000189491564057171, + "loss": 1.6845, + "step": 1235 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018947470804164685, + "loss": 1.4748, + "step": 1236 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018945783926898105, + "loss": 1.8907, + "step": 1237 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018944095774157873, + "loss": 1.5758, + "step": 1238 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018942406346184683, + "loss": 1.6367, + "step": 1239 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018940715643219407, + "loss": 1.7285, + "step": 1240 + }, + { + "epoch": 1.47, + "learning_rate": 0.00018939023665503108, + "loss": 1.5714, + "step": 1241 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001893733041327702, + "loss": 1.9308, + "step": 1242 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018935635886782568, + "loss": 1.9153, + "step": 1243 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018933940086261351, + "loss": 1.8009, + "step": 1244 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018932243011955154, + "loss": 1.7392, + "step": 1245 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018930544664105944, + "loss": 1.821, + "step": 1246 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001892884504295587, + "loss": 1.475, + "step": 1247 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018927144148747255, + "loss": 1.8937, + "step": 1248 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018925441981722618, + "loss": 1.6958, + "step": 1249 + }, + { + "epoch": 1.48, + "learning_rate": 0.00018923738542124644, + "loss": 1.6836, + "step": 1250 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018922033830196208, + "loss": 2.0213, + "step": 1251 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018920327846180365, + "loss": 1.9572, + "step": 1252 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018918620590320352, + "loss": 1.9449, + "step": 1253 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018916912062859583, + "loss": 1.7297, + "step": 1254 + }, + { + "epoch": 1.49, + "eval_loss": 2.0551259517669678, + "eval_runtime": 283.8338, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1254 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018915202264041664, + "loss": 1.8158, + "step": 1255 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891349119411037, + "loss": 1.921, + "step": 1256 + }, + { + "epoch": 1.49, + "learning_rate": 0.00018911778853309658, + "loss": 1.5726, + "step": 1257 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001891006524188368, + "loss": 1.6641, + "step": 1258 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018908350360076752, + "loss": 1.5841, + "step": 1259 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018906634208133385, + "loss": 1.8567, + "step": 1260 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018904916786298257, + "loss": 1.5584, + "step": 1261 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018903198094816242, + "loss": 1.6615, + "step": 1262 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018901478133932385, + "loss": 1.7477, + "step": 1263 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018899756903891914, + "loss": 1.3796, + "step": 1264 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018898034404940238, + "loss": 1.7991, + "step": 1265 + }, + { + "epoch": 1.5, + "learning_rate": 0.00018896310637322953, + "loss": 1.4944, + "step": 1266 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018894585601285827, + "loss": 1.5719, + "step": 1267 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018892859297074812, + "loss": 1.5495, + "step": 1268 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018891131724936043, + "loss": 1.7611, + "step": 1269 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018889402885115833, + "loss": 1.5991, + "step": 1270 + }, + { + "epoch": 1.51, + "learning_rate": 0.00018887672777860676, + "loss": 1.8849, + "step": 1271 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888594140341725, + "loss": 1.6136, + "step": 1272 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888420876203241, + "loss": 1.8288, + "step": 1273 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001888247485395319, + "loss": 1.6625, + "step": 1274 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018880739679426816, + "loss": 1.49, + "step": 1275 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018879003238700675, + "loss": 1.874, + "step": 1276 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018877265532022352, + "loss": 1.751, + "step": 1277 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018875526559639604, + "loss": 1.9882, + "step": 1278 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018873786321800374, + "loss": 1.5214, + "step": 1279 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001887204481875278, + "loss": 1.741, + "step": 1280 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018870302050745118, + "loss": 1.7798, + "step": 1281 + }, + { + "epoch": 1.52, + "learning_rate": 0.00018868558018025878, + "loss": 1.9258, + "step": 1282 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001886681272084371, + "loss": 1.9096, + "step": 1283 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018865066159447466, + "loss": 1.6729, + "step": 1284 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018863318334086157, + "loss": 1.6239, + "step": 1285 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018861569245008994, + "loss": 1.9857, + "step": 1286 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018859818892465354, + "loss": 1.9905, + "step": 1287 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188580672767048, + "loss": 2.0073, + "step": 1288 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018856314397977075, + "loss": 1.7109, + "step": 1289 + }, + { + "epoch": 1.53, + "learning_rate": 0.000188545602565321, + "loss": 1.3727, + "step": 1290 + }, + { + "epoch": 1.53, + "learning_rate": 0.00018852804852619975, + "loss": 1.7045, + "step": 1291 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018851048186490992, + "loss": 1.9042, + "step": 1292 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018849290258395602, + "loss": 1.7174, + "step": 1293 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018847531068584452, + "loss": 1.6502, + "step": 1294 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018845770617308366, + "loss": 1.8582, + "step": 1295 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001884400890481834, + "loss": 1.4846, + "step": 1296 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018842245931365562, + "loss": 1.5428, + "step": 1297 + }, + { + "epoch": 1.54, + "learning_rate": 0.00018840481697201392, + "loss": 1.7266, + "step": 1298 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001883871620257737, + "loss": 1.9324, + "step": 1299 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018836949447745215, + "loss": 1.577, + "step": 1300 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001883518143295683, + "loss": 1.6388, + "step": 1301 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018833412158464298, + "loss": 1.9201, + "step": 1302 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018831641624519877, + "loss": 1.6478, + "step": 1303 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018829869831376005, + "loss": 1.6826, + "step": 1304 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018828096779285303, + "loss": 1.8513, + "step": 1305 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018826322468500566, + "loss": 1.571, + "step": 1306 + }, + { + "epoch": 1.55, + "learning_rate": 0.00018824546899274777, + "loss": 1.1602, + "step": 1307 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001882277007186109, + "loss": 1.9998, + "step": 1308 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001882099198651284, + "loss": 1.7034, + "step": 1309 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001881921264348355, + "loss": 1.4031, + "step": 1310 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018817432043026911, + "loss": 1.8413, + "step": 1311 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018815650185396797, + "loss": 1.6606, + "step": 1312 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018813867070847264, + "loss": 1.5792, + "step": 1313 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018812082699632546, + "loss": 1.4525, + "step": 1314 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018810297072007054, + "loss": 1.4906, + "step": 1315 + }, + { + "epoch": 1.56, + "learning_rate": 0.00018808510188225377, + "loss": 1.6284, + "step": 1316 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880672204854229, + "loss": 1.7281, + "step": 1317 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001880493265321274, + "loss": 1.5345, + "step": 1318 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018803142002491856, + "loss": 2.0933, + "step": 1319 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018801350096634946, + "loss": 1.9372, + "step": 1320 + }, + { + "epoch": 1.57, + "learning_rate": 0.000187995569358975, + "loss": 1.7151, + "step": 1321 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018797762520535177, + "loss": 1.4823, + "step": 1322 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001879596685080383, + "loss": 2.0495, + "step": 1323 + }, + { + "epoch": 1.57, + "learning_rate": 0.00018794169926959474, + "loss": 2.2966, + "step": 1324 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018792371749258314, + "loss": 1.7868, + "step": 1325 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018790572317956735, + "loss": 1.9403, + "step": 1326 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018788771633311292, + "loss": 1.6687, + "step": 1327 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018786969695578723, + "loss": 1.8422, + "step": 1328 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018785166505015948, + "loss": 1.5916, + "step": 1329 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018783362061880062, + "loss": 1.9119, + "step": 1330 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018781556366428336, + "loss": 1.4903, + "step": 1331 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018779749418918227, + "loss": 1.9497, + "step": 1332 + }, + { + "epoch": 1.58, + "learning_rate": 0.00018777941219607364, + "loss": 1.9462, + "step": 1333 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018776131768753556, + "loss": 2.0474, + "step": 1334 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018774321066614795, + "loss": 1.4474, + "step": 1335 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018772509113449245, + "loss": 1.8315, + "step": 1336 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018770695909515247, + "loss": 1.7684, + "step": 1337 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018768881455071332, + "loss": 1.2675, + "step": 1338 + }, + { + "epoch": 1.59, + "learning_rate": 0.000187670657503762, + "loss": 1.8226, + "step": 1339 + }, + { + "epoch": 1.59, + "learning_rate": 0.00018765248795688726, + "loss": 2.2112, + "step": 1340 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001876343059126797, + "loss": 1.3627, + "step": 1341 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018761611137373173, + "loss": 2.1488, + "step": 1342 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018759790434263744, + "loss": 1.9842, + "step": 1343 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018757968482199276, + "loss": 1.9775, + "step": 1344 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018756145281439545, + "loss": 1.6835, + "step": 1345 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875432083224449, + "loss": 1.5272, + "step": 1346 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001875249513487425, + "loss": 1.7539, + "step": 1347 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018750668189589117, + "loss": 1.874, + "step": 1348 + }, + { + "epoch": 1.6, + "learning_rate": 0.00018748839996649583, + "loss": 1.5858, + "step": 1349 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018747010556316305, + "loss": 1.9298, + "step": 1350 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001874517986885012, + "loss": 1.5079, + "step": 1351 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018743347934512046, + "loss": 1.884, + "step": 1352 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018741514753563277, + "loss": 1.7978, + "step": 1353 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873968032626518, + "loss": 1.7735, + "step": 1354 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018737844652879312, + "loss": 1.7227, + "step": 1355 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018736007733667393, + "loss": 1.8458, + "step": 1356 + }, + { + "epoch": 1.61, + "learning_rate": 0.00018734169568891334, + "loss": 1.3268, + "step": 1357 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001873233015881321, + "loss": 1.3782, + "step": 1358 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018730489503695287, + "loss": 1.9614, + "step": 1359 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018728647603800003, + "loss": 1.7755, + "step": 1360 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018726804459389963, + "loss": 1.7961, + "step": 1361 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018724960070727972, + "loss": 1.7158, + "step": 1362 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872311443807699, + "loss": 1.6303, + "step": 1363 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001872126756170017, + "loss": 1.8734, + "step": 1364 + }, + { + "epoch": 1.62, + "learning_rate": 0.00018719419441860834, + "loss": 1.5143, + "step": 1365 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001871757007882248, + "loss": 1.498, + "step": 1366 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001871571947284879, + "loss": 1.0886, + "step": 1367 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018713867624203621, + "loss": 1.6633, + "step": 1368 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018712014533151008, + "loss": 1.8895, + "step": 1369 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018710160199955156, + "loss": 1.4178, + "step": 1370 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018708304624880456, + "loss": 1.6814, + "step": 1371 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001870644780819147, + "loss": 1.8671, + "step": 1372 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018704589750152944, + "loss": 1.4786, + "step": 1373 + }, + { + "epoch": 1.63, + "learning_rate": 0.00018702730451029796, + "loss": 1.8622, + "step": 1374 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018700869911087115, + "loss": 1.8891, + "step": 1375 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869900813059018, + "loss": 2.0493, + "step": 1376 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018697145109804436, + "loss": 1.7238, + "step": 1377 + }, + { + "epoch": 1.64, + "learning_rate": 0.00018695280848995513, + "loss": 1.7826, + "step": 1378 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869341534842921, + "loss": 1.8557, + "step": 1379 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001869154860837151, + "loss": 1.7492, + "step": 1380 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868968062908857, + "loss": 1.7441, + "step": 1381 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868781141084672, + "loss": 1.8322, + "step": 1382 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001868594095391247, + "loss": 1.8177, + "step": 1383 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018684069258552508, + "loss": 2.0001, + "step": 1384 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018682196325033696, + "loss": 1.5046, + "step": 1385 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018680322153623075, + "loss": 1.6789, + "step": 1386 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867844674458786, + "loss": 1.6951, + "step": 1387 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018676570098195443, + "loss": 2.0334, + "step": 1388 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018674692214713388, + "loss": 1.7833, + "step": 1389 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001867281309440945, + "loss": 1.82, + "step": 1390 + }, + { + "epoch": 1.65, + "learning_rate": 0.00018670932737551547, + "loss": 1.8155, + "step": 1391 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018669051144407775, + "loss": 1.7912, + "step": 1392 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018667168315246406, + "loss": 1.5816, + "step": 1393 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018665284250335895, + "loss": 1.7521, + "step": 1394 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018663398949944865, + "loss": 1.4287, + "step": 1395 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018661512414342127, + "loss": 1.6026, + "step": 1396 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018659624643796647, + "loss": 1.6953, + "step": 1397 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018657735638577587, + "loss": 1.8515, + "step": 1398 + }, + { + "epoch": 1.66, + "learning_rate": 0.00018655845398954276, + "loss": 2.0384, + "step": 1399 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018653953925196225, + "loss": 1.5458, + "step": 1400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018652061217573114, + "loss": 1.7166, + "step": 1401 + }, + { + "epoch": 1.67, + "learning_rate": 0.000186501672763548, + "loss": 1.5653, + "step": 1402 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018648272101811318, + "loss": 2.0928, + "step": 1403 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018646375694212884, + "loss": 1.605, + "step": 1404 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018644478053829878, + "loss": 1.4734, + "step": 1405 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018642579180932865, + "loss": 2.0578, + "step": 1406 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018640679075792582, + "loss": 1.9823, + "step": 1407 + }, + { + "epoch": 1.67, + "learning_rate": 0.00018638777738679943, + "loss": 2.0551, + "step": 1408 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018636875169866036, + "loss": 1.6315, + "step": 1409 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863497136962213, + "loss": 1.8965, + "step": 1410 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001863306633821966, + "loss": 1.3584, + "step": 1411 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018631160075930245, + "loss": 1.9673, + "step": 1412 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018629252583025676, + "loss": 1.5277, + "step": 1413 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001862734385977792, + "loss": 1.6788, + "step": 1414 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018625433906459116, + "loss": 1.432, + "step": 1415 + }, + { + "epoch": 1.68, + "learning_rate": 0.00018623522723341588, + "loss": 1.8102, + "step": 1416 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018621610310697823, + "loss": 1.6713, + "step": 1417 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018619696668800492, + "loss": 1.6989, + "step": 1418 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001861778179792244, + "loss": 1.7645, + "step": 1419 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018615865698336684, + "loss": 1.594, + "step": 1420 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018613948370316415, + "loss": 1.8751, + "step": 1421 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018612029814135014, + "loss": 1.64, + "step": 1422 + }, + { + "epoch": 1.69, + "learning_rate": 0.00018610110030066007, + "loss": 1.5066, + "step": 1423 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001860818901838313, + "loss": 1.9817, + "step": 1424 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018606266779360266, + "loss": 2.056, + "step": 1425 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860434331327149, + "loss": 1.6997, + "step": 1426 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018602418620391044, + "loss": 1.5573, + "step": 1427 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001860049270099335, + "loss": 1.8427, + "step": 1428 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018598565555353, + "loss": 2.012, + "step": 1429 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018596637183744763, + "loss": 1.7976, + "step": 1430 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018594707586443585, + "loss": 1.4, + "step": 1431 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001859277676372458, + "loss": 1.8717, + "step": 1432 + }, + { + "epoch": 1.7, + "learning_rate": 0.00018590844715863045, + "loss": 1.4311, + "step": 1433 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018588911443134448, + "loss": 1.5903, + "step": 1434 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018586976945814425, + "loss": 2.0898, + "step": 1435 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018585041224178803, + "loss": 1.5302, + "step": 1436 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018583104278503568, + "loss": 1.9582, + "step": 1437 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018581166109064886, + "loss": 1.5264, + "step": 1438 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018579226716139096, + "loss": 1.6551, + "step": 1439 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018577286100002723, + "loss": 1.7774, + "step": 1440 + }, + { + "epoch": 1.71, + "learning_rate": 0.00018575344260932444, + "loss": 1.8316, + "step": 1441 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001857340119920513, + "loss": 1.3916, + "step": 1442 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018571456915097818, + "loss": 1.6728, + "step": 1443 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856951140888772, + "loss": 1.7247, + "step": 1444 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018567564680852224, + "loss": 1.4539, + "step": 1445 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018565616731268888, + "loss": 1.613, + "step": 1446 + }, + { + "epoch": 1.72, + "learning_rate": 0.0001856366756041545, + "loss": 1.757, + "step": 1447 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018561717168569816, + "loss": 1.6903, + "step": 1448 + }, + { + "epoch": 1.72, + "learning_rate": 0.00018559765556010072, + "loss": 1.7322, + "step": 1449 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018557812723014476, + "loss": 1.5627, + "step": 1450 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018555858669861458, + "loss": 1.8751, + "step": 1451 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018553903396829625, + "loss": 1.2721, + "step": 1452 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018551946904197752, + "loss": 1.8167, + "step": 1453 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018549989192244797, + "loss": 1.6602, + "step": 1454 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018548030261249885, + "loss": 1.9053, + "step": 1455 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018546070111492315, + "loss": 1.7721, + "step": 1456 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018544108743251566, + "loss": 2.1421, + "step": 1457 + }, + { + "epoch": 1.73, + "learning_rate": 0.00018542146156807284, + "loss": 1.5076, + "step": 1458 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018540182352439288, + "loss": 1.9039, + "step": 1459 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018538217330427582, + "loss": 1.9777, + "step": 1460 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018536251091052323, + "loss": 1.5702, + "step": 1461 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018534283634593862, + "loss": 1.851, + "step": 1462 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018532314961332717, + "loss": 1.5337, + "step": 1463 + }, + { + "epoch": 1.74, + "eval_loss": 2.068387508392334, + "eval_runtime": 283.4638, + "eval_samples_per_second": 0.727, + "eval_steps_per_second": 0.727, + "step": 1463 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018530345071549574, + "loss": 1.7553, + "step": 1464 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018528373965525296, + "loss": 1.4175, + "step": 1465 + }, + { + "epoch": 1.74, + "learning_rate": 0.00018526401643540922, + "loss": 1.7216, + "step": 1466 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018524428105877664, + "loss": 1.6415, + "step": 1467 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018522453352816896, + "loss": 1.7284, + "step": 1468 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018520477384640187, + "loss": 1.8314, + "step": 1469 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018518500201629258, + "loss": 1.8341, + "step": 1470 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018516521804066015, + "loss": 1.4129, + "step": 1471 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018514542192232537, + "loss": 1.4671, + "step": 1472 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018512561366411067, + "loss": 1.6665, + "step": 1473 + }, + { + "epoch": 1.75, + "learning_rate": 0.00018510579326884034, + "loss": 1.5722, + "step": 1474 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850859607393403, + "loss": 1.9348, + "step": 1475 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850661160784383, + "loss": 1.5404, + "step": 1476 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018504625928896363, + "loss": 1.4769, + "step": 1477 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018502639037374757, + "loss": 1.4149, + "step": 1478 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001850065093356229, + "loss": 1.958, + "step": 1479 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018498661617742426, + "loss": 1.8319, + "step": 1480 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018496671090198797, + "loss": 1.5948, + "step": 1481 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001849467935121521, + "loss": 1.8469, + "step": 1482 + }, + { + "epoch": 1.76, + "learning_rate": 0.00018492686401075644, + "loss": 1.6798, + "step": 1483 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001849069224006425, + "loss": 1.8197, + "step": 1484 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848869686846535, + "loss": 1.6613, + "step": 1485 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001848670028656344, + "loss": 1.7322, + "step": 1486 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018484702494643188, + "loss": 2.0493, + "step": 1487 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018482703492989444, + "loss": 1.7182, + "step": 1488 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018480703281887215, + "loss": 1.689, + "step": 1489 + }, + { + "epoch": 1.77, + "learning_rate": 0.00018478701861621686, + "loss": 1.9477, + "step": 1490 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001847669923247822, + "loss": 1.8171, + "step": 1491 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018474695394742345, + "loss": 1.7337, + "step": 1492 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847269034869977, + "loss": 1.6983, + "step": 1493 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001847068409463636, + "loss": 1.6445, + "step": 1494 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846867663283818, + "loss": 1.9965, + "step": 1495 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846666796359143, + "loss": 1.6775, + "step": 1496 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001846465808718252, + "loss": 1.8117, + "step": 1497 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018462647003898006, + "loss": 1.8803, + "step": 1498 + }, + { + "epoch": 1.78, + "learning_rate": 0.00018460634714024624, + "loss": 1.3045, + "step": 1499 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018458621217849286, + "loss": 1.7768, + "step": 1500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018456606515659073, + "loss": 2.0641, + "step": 1501 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001845459060774123, + "loss": 1.3804, + "step": 1502 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018452573494383192, + "loss": 1.6271, + "step": 1503 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018450555175872547, + "loss": 1.8525, + "step": 1504 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018448535652497073, + "loss": 1.5303, + "step": 1505 + }, + { + "epoch": 1.79, + "learning_rate": 0.000184465149245447, + "loss": 2.0368, + "step": 1506 + }, + { + "epoch": 1.79, + "learning_rate": 0.00018444492992303544, + "loss": 1.9951, + "step": 1507 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001844246985606189, + "loss": 1.8715, + "step": 1508 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018440445516108186, + "loss": 1.7373, + "step": 1509 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018438419972731067, + "loss": 1.7667, + "step": 1510 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018436393226219327, + "loss": 1.5134, + "step": 1511 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018434365276861938, + "loss": 1.3891, + "step": 1512 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001843233612494804, + "loss": 1.7066, + "step": 1513 + }, + { + "epoch": 1.8, + "learning_rate": 0.00018430305770766948, + "loss": 1.6366, + "step": 1514 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842827421460814, + "loss": 1.7838, + "step": 1515 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001842624145676128, + "loss": 1.7884, + "step": 1516 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001842420749751619, + "loss": 1.8428, + "step": 1517 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018422172337162867, + "loss": 1.4987, + "step": 1518 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018420135975991483, + "loss": 1.7576, + "step": 1519 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001841809841429238, + "loss": 1.8522, + "step": 1520 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018416059652356066, + "loss": 1.9308, + "step": 1521 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018414019690473227, + "loss": 1.4658, + "step": 1522 + }, + { + "epoch": 1.81, + "learning_rate": 0.00018411978528934717, + "loss": 1.7072, + "step": 1523 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001840993616803156, + "loss": 1.736, + "step": 1524 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840789260805495, + "loss": 1.7712, + "step": 1525 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001840584784929626, + "loss": 1.2231, + "step": 1526 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018403801892047023, + "loss": 1.8421, + "step": 1527 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018401754736598947, + "loss": 1.2689, + "step": 1528 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018399706383243918, + "loss": 1.8062, + "step": 1529 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839765683227398, + "loss": 1.6846, + "step": 1530 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001839560608398136, + "loss": 1.8201, + "step": 1531 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018393554138658441, + "loss": 1.6958, + "step": 1532 + }, + { + "epoch": 1.82, + "learning_rate": 0.00018391500996597796, + "loss": 1.8487, + "step": 1533 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001838944665809215, + "loss": 1.9788, + "step": 1534 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018387391123434412, + "loss": 1.6002, + "step": 1535 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018385334392917658, + "loss": 1.3859, + "step": 1536 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018383276466835127, + "loss": 2.0743, + "step": 1537 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018381217345480235, + "loss": 1.8357, + "step": 1538 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018379157029146573, + "loss": 1.7002, + "step": 1539 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018377095518127897, + "loss": 1.3058, + "step": 1540 + }, + { + "epoch": 1.83, + "learning_rate": 0.00018375032812718124, + "loss": 1.8745, + "step": 1541 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018372968913211364, + "loss": 1.7847, + "step": 1542 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018370903819901874, + "loss": 1.8156, + "step": 1543 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018368837533084095, + "loss": 2.0152, + "step": 1544 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018366770053052634, + "loss": 1.5656, + "step": 1545 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018364701380102266, + "loss": 1.5753, + "step": 1546 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018362631514527947, + "loss": 1.3938, + "step": 1547 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018360560456624788, + "loss": 1.9599, + "step": 1548 + }, + { + "epoch": 1.84, + "learning_rate": 0.00018358488206688075, + "loss": 1.8641, + "step": 1549 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018356414765013267, + "loss": 1.8428, + "step": 1550 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018354340131895998, + "loss": 1.6016, + "step": 1551 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018352264307632056, + "loss": 1.5768, + "step": 1552 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018350187292517415, + "loss": 1.5369, + "step": 1553 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001834810908684821, + "loss": 1.9717, + "step": 1554 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018346029690920746, + "loss": 1.943, + "step": 1555 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018343949105031505, + "loss": 1.8166, + "step": 1556 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018341867329477125, + "loss": 1.7149, + "step": 1557 + }, + { + "epoch": 1.85, + "learning_rate": 0.00018339784364554426, + "loss": 1.4657, + "step": 1558 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018337700210560397, + "loss": 1.8693, + "step": 1559 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018335614867792183, + "loss": 1.7656, + "step": 1560 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001833352833654712, + "loss": 1.5123, + "step": 1561 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018331440617122696, + "loss": 1.7884, + "step": 1562 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832935170981657, + "loss": 1.7309, + "step": 1563 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018327261614926583, + "loss": 1.9628, + "step": 1564 + }, + { + "epoch": 1.86, + "learning_rate": 0.00018325170332750732, + "loss": 1.6409, + "step": 1565 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001832307786358719, + "loss": 1.6093, + "step": 1566 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018320984207734298, + "loss": 1.6111, + "step": 1567 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018318889365490565, + "loss": 2.0085, + "step": 1568 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018316793337154664, + "loss": 2.079, + "step": 1569 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018314696123025454, + "loss": 1.5466, + "step": 1570 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018312597723401942, + "loss": 2.0825, + "step": 1571 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001831049813858332, + "loss": 1.9748, + "step": 1572 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018308397368868945, + "loss": 1.6529, + "step": 1573 + }, + { + "epoch": 1.87, + "learning_rate": 0.00018306295414558335, + "loss": 1.7119, + "step": 1574 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018304192275951184, + "loss": 1.8812, + "step": 1575 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018302087953347352, + "loss": 1.8676, + "step": 1576 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018299982447046877, + "loss": 1.879, + "step": 1577 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018297875757349952, + "loss": 1.6282, + "step": 1578 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018295767884556947, + "loss": 1.735, + "step": 1579 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018293658828968397, + "loss": 1.5796, + "step": 1580 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018291548590885007, + "loss": 1.8258, + "step": 1581 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018289437170607658, + "loss": 1.7531, + "step": 1582 + }, + { + "epoch": 1.88, + "learning_rate": 0.00018287324568437381, + "loss": 1.6265, + "step": 1583 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018285210784675394, + "loss": 1.7997, + "step": 1584 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018283095819623078, + "loss": 1.955, + "step": 1585 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018280979673581977, + "loss": 1.6542, + "step": 1586 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018278862346853808, + "loss": 1.7634, + "step": 1587 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018276743839740458, + "loss": 2.0077, + "step": 1588 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018274624152543977, + "loss": 2.0254, + "step": 1589 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018272503285566587, + "loss": 1.4464, + "step": 1590 + }, + { + "epoch": 1.89, + "learning_rate": 0.00018270381239110677, + "loss": 1.8643, + "step": 1591 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018268258013478804, + "loss": 1.3278, + "step": 1592 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018266133608973696, + "loss": 1.744, + "step": 1593 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018264008025898248, + "loss": 1.5079, + "step": 1594 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018261881264555516, + "loss": 1.9655, + "step": 1595 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001825975332524873, + "loss": 2.0557, + "step": 1596 + }, + { + "epoch": 1.9, + "learning_rate": 0.000182576242082813, + "loss": 1.7174, + "step": 1597 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018255493913956774, + "loss": 1.449, + "step": 1598 + }, + { + "epoch": 1.9, + "learning_rate": 0.00018253362442578896, + "loss": 1.9058, + "step": 1599 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018251229794451567, + "loss": 1.3482, + "step": 1600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018249095969878853, + "loss": 1.7906, + "step": 1601 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018246960969164994, + "loss": 1.6177, + "step": 1602 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018244824792614393, + "loss": 1.5786, + "step": 1603 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018242687440531618, + "loss": 1.6451, + "step": 1604 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018240548913221416, + "loss": 1.3695, + "step": 1605 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001823840921098869, + "loss": 1.6648, + "step": 1606 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018236268334138515, + "loss": 2.1548, + "step": 1607 + }, + { + "epoch": 1.91, + "learning_rate": 0.00018234126282976133, + "loss": 1.6153, + "step": 1608 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001823198305780696, + "loss": 1.741, + "step": 1609 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018229838658936564, + "loss": 1.7827, + "step": 1610 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018227693086670697, + "loss": 1.7343, + "step": 1611 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018225546341315261, + "loss": 1.8149, + "step": 1612 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001822339842317635, + "loss": 1.5497, + "step": 1613 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018221249332560198, + "loss": 1.7659, + "step": 1614 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001821909906977322, + "loss": 1.8992, + "step": 1615 + }, + { + "epoch": 1.92, + "learning_rate": 0.00018216947635122, + "loss": 1.8682, + "step": 1616 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018214795028913288, + "loss": 1.9774, + "step": 1617 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001821264125145399, + "loss": 1.9441, + "step": 1618 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018210486303051195, + "loss": 2.0314, + "step": 1619 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001820833018401215, + "loss": 1.8234, + "step": 1620 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018206172894644272, + "loss": 1.9478, + "step": 1621 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018204014435255135, + "loss": 1.7894, + "step": 1622 + }, + { + "epoch": 1.93, + "learning_rate": 0.000182018548061525, + "loss": 1.5469, + "step": 1623 + }, + { + "epoch": 1.93, + "learning_rate": 0.00018199694007644277, + "loss": 1.9419, + "step": 1624 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018197532040038547, + "loss": 1.6686, + "step": 1625 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018195368903643563, + "loss": 2.2525, + "step": 1626 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018193204598767744, + "loss": 1.8076, + "step": 1627 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018191039125719662, + "loss": 1.976, + "step": 1628 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018188872484808076, + "loss": 1.6896, + "step": 1629 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018186704676341898, + "loss": 1.6784, + "step": 1630 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018184535700630213, + "loss": 1.9634, + "step": 1631 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018182365557982264, + "loss": 1.7406, + "step": 1632 + }, + { + "epoch": 1.94, + "learning_rate": 0.00018180194248707473, + "loss": 1.7492, + "step": 1633 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018178021773115414, + "loss": 1.7731, + "step": 1634 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018175848131515837, + "loss": 1.6232, + "step": 1635 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817367332421866, + "loss": 1.7488, + "step": 1636 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001817149735153396, + "loss": 1.3398, + "step": 1637 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018169320213771983, + "loss": 1.4521, + "step": 1638 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018167141911243145, + "loss": 1.6311, + "step": 1639 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018164962444258014, + "loss": 1.8911, + "step": 1640 + }, + { + "epoch": 1.95, + "learning_rate": 0.00018162781813127346, + "loss": 1.9879, + "step": 1641 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001816060001816205, + "loss": 1.5637, + "step": 1642 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018158417059673196, + "loss": 1.7461, + "step": 1643 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815623293797203, + "loss": 1.6671, + "step": 1644 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815404765336996, + "loss": 1.2124, + "step": 1645 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001815186120617856, + "loss": 1.6402, + "step": 1646 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001814967359670957, + "loss": 1.8837, + "step": 1647 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018147484825274893, + "loss": 1.8027, + "step": 1648 + }, + { + "epoch": 1.96, + "learning_rate": 0.00018145294892186605, + "loss": 1.7684, + "step": 1649 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001814310379775694, + "loss": 1.8274, + "step": 1650 + }, + { + "epoch": 1.97, + "learning_rate": 0.000181409115422983, + "loss": 1.8292, + "step": 1651 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018138718126123248, + "loss": 1.3492, + "step": 1652 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018136523549544523, + "loss": 1.509, + "step": 1653 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018134327812875024, + "loss": 1.7415, + "step": 1654 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018132130916427816, + "loss": 1.5223, + "step": 1655 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018129932860516126, + "loss": 1.9294, + "step": 1656 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018127733645453348, + "loss": 2.0716, + "step": 1657 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018125533271553043, + "loss": 1.57, + "step": 1658 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018123331739128938, + "loss": 2.2132, + "step": 1659 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018121129048494922, + "loss": 1.9006, + "step": 1660 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018118925199965048, + "loss": 1.9319, + "step": 1661 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018116720193853543, + "loss": 1.8103, + "step": 1662 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018114514030474787, + "loss": 1.7028, + "step": 1663 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018112306710143334, + "loss": 1.802, + "step": 1664 + }, + { + "epoch": 1.98, + "learning_rate": 0.000181100982331739, + "loss": 1.6835, + "step": 1665 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001810788859988136, + "loss": 1.7223, + "step": 1666 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810567781058077, + "loss": 1.5829, + "step": 1667 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018103465865587333, + "loss": 1.9863, + "step": 1668 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001810125276521642, + "loss": 1.6398, + "step": 1669 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018099038509783582, + "loss": 1.9261, + "step": 1670 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018096823099604517, + "loss": 1.8882, + "step": 1671 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018094606534995093, + "loss": 1.6716, + "step": 1672 + }, + { + "epoch": 1.99, + "eval_loss": 2.075261354446411, + "eval_runtime": 283.9438, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 1672 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018092388816271345, + "loss": 1.6688, + "step": 1673 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018090169943749476, + "loss": 1.9127, + "step": 1674 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001808794991774584, + "loss": 1.7214, + "step": 1675 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018085728738576973, + "loss": 1.785, + "step": 1676 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018083506406559561, + "loss": 1.5287, + "step": 1677 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018081282922010464, + "loss": 1.9012, + "step": 1678 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018079058285246698, + "loss": 1.3094, + "step": 1679 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807683249658545, + "loss": 1.818, + "step": 1680 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807460555634407, + "loss": 1.9389, + "step": 1681 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001807237746484007, + "loss": 1.4334, + "step": 1682 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018070148222391126, + "loss": 1.5422, + "step": 1683 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806791782931508, + "loss": 1.7899, + "step": 1684 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806568628592994, + "loss": 1.6106, + "step": 1685 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018063453592553872, + "loss": 1.9807, + "step": 1686 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001806121974950521, + "loss": 1.1762, + "step": 1687 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018058984757102456, + "loss": 1.8338, + "step": 1688 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001805674861566426, + "loss": 1.5556, + "step": 1689 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805451132550946, + "loss": 0.87, + "step": 1690 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018052272886957038, + "loss": 1.0386, + "step": 1691 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001805003330032615, + "loss": 0.8153, + "step": 1692 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018047792565936102, + "loss": 1.1745, + "step": 1693 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018045550684106388, + "loss": 1.1584, + "step": 1694 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018043307655156644, + "loss": 1.0742, + "step": 1695 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018041063479406675, + "loss": 1.0537, + "step": 1696 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803881815717646, + "loss": 1.0239, + "step": 1697 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001803657168878612, + "loss": 0.9182, + "step": 1698 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018034324074555965, + "loss": 1.1856, + "step": 1699 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018032075314806448, + "loss": 1.3285, + "step": 1700 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018029825409858198, + "loss": 1.2912, + "step": 1701 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018027574360032, + "loss": 1.3666, + "step": 1702 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018025322165648807, + "loss": 0.9621, + "step": 1703 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018023068827029723, + "loss": 0.8484, + "step": 1704 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018020814344496037, + "loss": 1.2236, + "step": 1705 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018018558718369186, + "loss": 0.8155, + "step": 1706 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001801630194897077, + "loss": 1.2047, + "step": 1707 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018014044036622555, + "loss": 1.0269, + "step": 1708 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018011784981646474, + "loss": 1.0536, + "step": 1709 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018009524784364615, + "loss": 1.0516, + "step": 1710 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018007263445099235, + "loss": 0.9087, + "step": 1711 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001800500096417275, + "loss": 1.3057, + "step": 1712 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018002737341907743, + "loss": 0.8791, + "step": 1713 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018000472578626956, + "loss": 1.1667, + "step": 1714 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017998206674653294, + "loss": 1.1026, + "step": 1715 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017995939630309826, + "loss": 1.3228, + "step": 1716 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001799367144591978, + "loss": 0.9173, + "step": 1717 + }, + { + "epoch": 2.03, + "learning_rate": 0.00017991402121806557, + "loss": 1.0067, + "step": 1718 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798913165829371, + "loss": 1.0256, + "step": 1719 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017986860055704953, + "loss": 0.7645, + "step": 1720 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001798458731436417, + "loss": 1.0567, + "step": 1721 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017982313434595406, + "loss": 0.7465, + "step": 1722 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017980038416722863, + "loss": 1.3268, + "step": 1723 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017977762261070916, + "loss": 0.9917, + "step": 1724 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017975484967964087, + "loss": 0.8592, + "step": 1725 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017973206537727073, + "loss": 1.43, + "step": 1726 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017970926970684725, + "loss": 1.3679, + "step": 1727 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017968646267162063, + "loss": 1.2959, + "step": 1728 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017966364427484267, + "loss": 1.0674, + "step": 1729 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017964081451976672, + "loss": 1.1153, + "step": 1730 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017961797340964783, + "loss": 1.0586, + "step": 1731 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017959512094774266, + "loss": 1.2388, + "step": 1732 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017957225713730949, + "loss": 1.257, + "step": 1733 + }, + { + "epoch": 2.05, + "learning_rate": 0.0001795493819816081, + "loss": 1.099, + "step": 1734 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001795264954839001, + "loss": 0.9532, + "step": 1735 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017950359764744859, + "loss": 1.2553, + "step": 1736 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017948068847551825, + "loss": 0.9973, + "step": 1737 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017945776797137543, + "loss": 1.0637, + "step": 1738 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017943483613828815, + "loss": 1.1815, + "step": 1739 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017941189297952597, + "loss": 0.8378, + "step": 1740 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017938893849836002, + "loss": 0.9375, + "step": 1741 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017936597269806322, + "loss": 0.9653, + "step": 1742 + }, + { + "epoch": 2.06, + "learning_rate": 0.0001793429955819099, + "loss": 1.221, + "step": 1743 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017932000715317612, + "loss": 1.041, + "step": 1744 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017929700741513955, + "loss": 1.0724, + "step": 1745 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017927399637107945, + "loss": 1.1102, + "step": 1746 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017925097402427667, + "loss": 0.8542, + "step": 1747 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001792279403780137, + "loss": 1.2339, + "step": 1748 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017920489543557465, + "loss": 0.8671, + "step": 1749 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791818392002452, + "loss": 0.9779, + "step": 1750 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001791587716753127, + "loss": 1.1242, + "step": 1751 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017913569286406603, + "loss": 0.9043, + "step": 1752 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001791126027697958, + "loss": 0.7996, + "step": 1753 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017908950139579406, + "loss": 0.8602, + "step": 1754 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017906638874535462, + "loss": 1.0161, + "step": 1755 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017904326482177284, + "loss": 0.8226, + "step": 1756 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017902012962834566, + "loss": 1.3885, + "step": 1757 + }, + { + "epoch": 2.08, + "learning_rate": 0.0001789969831683717, + "loss": 1.2158, + "step": 1758 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017897382544515108, + "loss": 1.3261, + "step": 1759 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017895065646198567, + "loss": 1.2144, + "step": 1760 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017892747622217875, + "loss": 0.9881, + "step": 1761 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001789042847290354, + "loss": 1.0342, + "step": 1762 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017888108198586217, + "loss": 0.7883, + "step": 1763 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017885786799596732, + "loss": 0.9006, + "step": 1764 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017883464276266064, + "loss": 1.3695, + "step": 1765 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001788114062892535, + "loss": 1.0303, + "step": 1766 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017878815857905897, + "loss": 1.3816, + "step": 1767 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001787648996353916, + "loss": 0.8684, + "step": 1768 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017874162946156772, + "loss": 1.1157, + "step": 1769 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017871834806090501, + "loss": 1.0087, + "step": 1770 + }, + { + "epoch": 2.1, + "learning_rate": 0.000178695055436723, + "loss": 0.7173, + "step": 1771 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017867175159234265, + "loss": 1.4784, + "step": 1772 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017864843653108662, + "loss": 1.1401, + "step": 1773 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786251102562791, + "loss": 1.0952, + "step": 1774 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001786017727712459, + "loss": 0.9443, + "step": 1775 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017857842407931445, + "loss": 1.0682, + "step": 1776 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785550641838138, + "loss": 0.9402, + "step": 1777 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017853169308807448, + "loss": 1.0576, + "step": 1778 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001785083107954288, + "loss": 1.1425, + "step": 1779 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017848491730921046, + "loss": 1.1402, + "step": 1780 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017846151263275494, + "loss": 1.4482, + "step": 1781 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017843809676939922, + "loss": 0.7765, + "step": 1782 + }, + { + "epoch": 2.11, + "learning_rate": 0.00017841466972248188, + "loss": 1.1478, + "step": 1783 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001783912314953431, + "loss": 1.1876, + "step": 1784 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017836778209132464, + "loss": 1.2036, + "step": 1785 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783443215137699, + "loss": 1.0297, + "step": 1786 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001783208497660239, + "loss": 0.8186, + "step": 1787 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017829736685143308, + "loss": 0.7258, + "step": 1788 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017827387277334568, + "loss": 0.8072, + "step": 1789 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017825036753511144, + "loss": 1.0474, + "step": 1790 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017822685114008167, + "loss": 1.2141, + "step": 1791 + }, + { + "epoch": 2.12, + "learning_rate": 0.00017820332359160928, + "loss": 1.1443, + "step": 1792 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001781797848930488, + "loss": 0.9864, + "step": 1793 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017815623504775636, + "loss": 1.2998, + "step": 1794 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001781326740590896, + "loss": 1.0672, + "step": 1795 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017810910193040785, + "loss": 0.9152, + "step": 1796 + }, + { + "epoch": 2.13, + "learning_rate": 0.000178085518665072, + "loss": 1.2555, + "step": 1797 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017806192426644444, + "loss": 1.2085, + "step": 1798 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017803831873788926, + "loss": 1.6205, + "step": 1799 + }, + { + "epoch": 2.13, + "learning_rate": 0.0001780147020827721, + "loss": 1.3382, + "step": 1800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00017799107430446016, + "loss": 1.3309, + "step": 1801 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017796743540632223, + "loss": 1.2556, + "step": 1802 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017794378539172877, + "loss": 0.829, + "step": 1803 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017792012426405166, + "loss": 1.1711, + "step": 1804 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017789645202666456, + "loss": 1.0128, + "step": 1805 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017787276868294253, + "loss": 1.2074, + "step": 1806 + }, + { + "epoch": 2.14, + "learning_rate": 0.00017784907423626237, + "loss": 1.0996, + "step": 1807 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778253686900023, + "loss": 0.9608, + "step": 1808 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001778016520475423, + "loss": 0.827, + "step": 1809 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017777792431226383, + "loss": 1.2365, + "step": 1810 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017775418548754993, + "loss": 1.0276, + "step": 1811 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777304355767852, + "loss": 0.8178, + "step": 1812 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001777066745833559, + "loss": 1.1297, + "step": 1813 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017768290251064987, + "loss": 1.1737, + "step": 1814 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017765911936205644, + "loss": 1.1606, + "step": 1815 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017763532514096658, + "loss": 1.2605, + "step": 1816 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001776115198507728, + "loss": 1.2271, + "step": 1817 + }, + { + "epoch": 2.15, + "learning_rate": 0.00017758770349486923, + "loss": 0.9407, + "step": 1818 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001775638760766516, + "loss": 1.0273, + "step": 1819 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017754003759951715, + "loss": 1.0746, + "step": 1820 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017751618806686472, + "loss": 1.0091, + "step": 1821 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017749232748209473, + "loss": 0.997, + "step": 1822 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001774684558486092, + "loss": 1.4814, + "step": 1823 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017744457316981168, + "loss": 1.1407, + "step": 1824 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017742067944910737, + "loss": 0.9824, + "step": 1825 + }, + { + "epoch": 2.16, + "learning_rate": 0.00017739677468990293, + "loss": 1.2603, + "step": 1826 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017737285889560668, + "loss": 1.3721, + "step": 1827 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017734893206962853, + "loss": 1.1186, + "step": 1828 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017732499421537984, + "loss": 0.7693, + "step": 1829 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001773010453362737, + "loss": 1.0449, + "step": 1830 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017727708543572467, + "loss": 0.9331, + "step": 1831 + }, + { + "epoch": 2.17, + "learning_rate": 0.0001772531145171489, + "loss": 0.739, + "step": 1832 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017722913258396417, + "loss": 0.9076, + "step": 1833 + }, + { + "epoch": 2.17, + "learning_rate": 0.00017720513963958968, + "loss": 1.3464, + "step": 1834 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017718113568744638, + "loss": 0.8858, + "step": 1835 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017715712073095672, + "loss": 1.3204, + "step": 1836 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017713309477354467, + "loss": 1.0538, + "step": 1837 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001771090578186358, + "loss": 1.44, + "step": 1838 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770850098696573, + "loss": 1.0167, + "step": 1839 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017706095093003785, + "loss": 0.9724, + "step": 1840 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017703688100320774, + "loss": 0.8055, + "step": 1841 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001770128000925988, + "loss": 0.7363, + "step": 1842 + }, + { + "epoch": 2.18, + "learning_rate": 0.00017698870820164446, + "loss": 1.1329, + "step": 1843 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017696460533377968, + "loss": 0.9487, + "step": 1844 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017694049149244104, + "loss": 1.2571, + "step": 1845 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001769163666810666, + "loss": 0.9148, + "step": 1846 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017689223090309607, + "loss": 1.4676, + "step": 1847 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017686808416197072, + "loss": 0.9395, + "step": 1848 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017684392646113325, + "loss": 0.9632, + "step": 1849 + }, + { + "epoch": 2.19, + "learning_rate": 0.00017681975780402807, + "loss": 1.0037, + "step": 1850 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001767955781941011, + "loss": 0.9557, + "step": 1851 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017677138763479985, + "loss": 1.2799, + "step": 1852 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017674718612957336, + "loss": 0.8461, + "step": 1853 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001767229736818722, + "loss": 1.2762, + "step": 1854 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017669875029514856, + "loss": 1.4801, + "step": 1855 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017667451597285617, + "loss": 0.9849, + "step": 1856 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766502707184503, + "loss": 1.0875, + "step": 1857 + }, + { + "epoch": 2.2, + "learning_rate": 0.00017662601453538783, + "loss": 0.8346, + "step": 1858 + }, + { + "epoch": 2.2, + "learning_rate": 0.0001766017474271271, + "loss": 1.1933, + "step": 1859 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017657746939712815, + "loss": 0.8789, + "step": 1860 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017655318044885245, + "loss": 1.0091, + "step": 1861 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001765288805857631, + "loss": 0.7371, + "step": 1862 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017650456981132466, + "loss": 0.8131, + "step": 1863 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017648024812900342, + "loss": 1.0795, + "step": 1864 + }, + { + "epoch": 2.21, + "learning_rate": 0.000176455915542267, + "loss": 0.9882, + "step": 1865 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017643157205458483, + "loss": 1.212, + "step": 1866 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017640721766942768, + "loss": 1.4755, + "step": 1867 + }, + { + "epoch": 2.21, + "learning_rate": 0.00017638285239026798, + "loss": 1.0391, + "step": 1868 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017635847622057965, + "loss": 1.2568, + "step": 1869 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017633408916383826, + "loss": 1.2138, + "step": 1870 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001763096912235208, + "loss": 1.196, + "step": 1871 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017628528240310596, + "loss": 1.1476, + "step": 1872 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017626086270607384, + "loss": 1.1421, + "step": 1873 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017623643213590619, + "loss": 1.0711, + "step": 1874 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001762119906960863, + "loss": 0.8842, + "step": 1875 + }, + { + "epoch": 2.22, + "learning_rate": 0.00017618753839009893, + "loss": 0.798, + "step": 1876 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761630752214305, + "loss": 0.8591, + "step": 1877 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017613860119356883, + "loss": 0.7646, + "step": 1878 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001761141163100035, + "loss": 1.4113, + "step": 1879 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017608962057422549, + "loss": 0.8605, + "step": 1880 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017606511398972731, + "loss": 0.6179, + "step": 1881 + }, + { + "epoch": 2.23, + "eval_loss": 2.3971996307373047, + "eval_runtime": 283.7444, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 1881 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760405965600031, + "loss": 0.8651, + "step": 1882 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001760160682885485, + "loss": 1.3178, + "step": 1883 + }, + { + "epoch": 2.23, + "learning_rate": 0.00017599152917886071, + "loss": 0.9233, + "step": 1884 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017596697923443847, + "loss": 0.9126, + "step": 1885 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001759424184587821, + "loss": 0.9749, + "step": 1886 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017591784685539334, + "loss": 1.1929, + "step": 1887 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017589326442777565, + "loss": 1.2026, + "step": 1888 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017586867117943392, + "loss": 1.1162, + "step": 1889 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017584406711387463, + "loss": 0.9818, + "step": 1890 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001758194522346057, + "loss": 0.9802, + "step": 1891 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001757948265451368, + "loss": 0.8963, + "step": 1892 + }, + { + "epoch": 2.24, + "learning_rate": 0.00017577019004897897, + "loss": 1.0359, + "step": 1893 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017574554274964478, + "loss": 1.0788, + "step": 1894 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017572088465064848, + "loss": 0.9415, + "step": 1895 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756962157555057, + "loss": 1.0944, + "step": 1896 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017567153606773373, + "loss": 1.357, + "step": 1897 + }, + { + "epoch": 2.25, + "learning_rate": 0.00017564684559085136, + "loss": 1.0108, + "step": 1898 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001756221443283789, + "loss": 0.5337, + "step": 1899 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755974322838382, + "loss": 1.4234, + "step": 1900 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001755727094607527, + "loss": 0.9083, + "step": 1901 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017554797586264727, + "loss": 0.9199, + "step": 1902 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017552323149304844, + "loss": 1.1885, + "step": 1903 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754984763554842, + "loss": 1.276, + "step": 1904 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754737104534841, + "loss": 0.8882, + "step": 1905 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017544893379057918, + "loss": 0.993, + "step": 1906 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001754241463703021, + "loss": 1.261, + "step": 1907 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017539934819618696, + "loss": 0.9877, + "step": 1908 + }, + { + "epoch": 2.26, + "learning_rate": 0.00017537453927176947, + "loss": 0.9991, + "step": 1909 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017534971960058685, + "loss": 1.2012, + "step": 1910 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001753248891861778, + "loss": 0.864, + "step": 1911 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017530004803208263, + "loss": 1.0382, + "step": 1912 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017527519614184316, + "loss": 1.068, + "step": 1913 + }, + { + "epoch": 2.27, + "learning_rate": 0.00017525033351900268, + "loss": 0.8687, + "step": 1914 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752254601671061, + "loss": 1.1174, + "step": 1915 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001752005760896998, + "loss": 1.269, + "step": 1916 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751756812903317, + "loss": 0.7387, + "step": 1917 + }, + { + "epoch": 2.27, + "learning_rate": 0.0001751507757725513, + "loss": 0.8484, + "step": 1918 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001751258595399095, + "loss": 1.0092, + "step": 1919 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017510093259595885, + "loss": 1.0145, + "step": 1920 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017507599494425344, + "loss": 1.2969, + "step": 1921 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017505104658834875, + "loss": 0.7925, + "step": 1922 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017502608753180196, + "loss": 0.8974, + "step": 1923 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017500111777817164, + "loss": 0.764, + "step": 1924 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001749761373310179, + "loss": 1.1057, + "step": 1925 + }, + { + "epoch": 2.28, + "learning_rate": 0.00017495114619390246, + "loss": 0.8092, + "step": 1926 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017492614437038845, + "loss": 0.9553, + "step": 1927 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017490113186404067, + "loss": 1.0278, + "step": 1928 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748761086784253, + "loss": 1.2152, + "step": 1929 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017485107481711012, + "loss": 1.5154, + "step": 1930 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001748260302836644, + "loss": 1.1973, + "step": 1931 + }, + { + "epoch": 2.29, + "learning_rate": 0.00017480097508165896, + "loss": 0.9429, + "step": 1932 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747759092146661, + "loss": 1.5453, + "step": 1933 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001747508326862597, + "loss": 1.1691, + "step": 1934 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017472574550001508, + "loss": 1.2094, + "step": 1935 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017470064765950918, + "loss": 1.0777, + "step": 1936 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017467553916832035, + "loss": 1.0883, + "step": 1937 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017465042003002857, + "loss": 0.9297, + "step": 1938 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017462529024821522, + "loss": 0.7814, + "step": 1939 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017460014982646334, + "loss": 1.3645, + "step": 1940 + }, + { + "epoch": 2.3, + "learning_rate": 0.0001745749987683573, + "loss": 1.0604, + "step": 1941 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017454983707748317, + "loss": 0.9416, + "step": 1942 + }, + { + "epoch": 2.3, + "learning_rate": 0.00017452466475742845, + "loss": 1.4187, + "step": 1943 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017449948181178215, + "loss": 1.1619, + "step": 1944 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017447428824413482, + "loss": 1.1381, + "step": 1945 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017444908405807845, + "loss": 1.2304, + "step": 1946 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001744238692572067, + "loss": 1.2149, + "step": 1947 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017439864384511463, + "loss": 0.8172, + "step": 1948 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017437340782539877, + "loss": 1.0783, + "step": 1949 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017434816120165728, + "loss": 1.0661, + "step": 1950 + }, + { + "epoch": 2.31, + "learning_rate": 0.00017432290397748982, + "loss": 1.1959, + "step": 1951 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001742976361564974, + "loss": 1.0581, + "step": 1952 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017427235774228274, + "loss": 0.8948, + "step": 1953 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017424706873845, + "loss": 1.2565, + "step": 1954 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017422176914860476, + "loss": 0.9237, + "step": 1955 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017419645897635432, + "loss": 1.219, + "step": 1956 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017417113822530727, + "loss": 1.4606, + "step": 1957 + }, + { + "epoch": 2.32, + "learning_rate": 0.00017414580689907377, + "loss": 0.714, + "step": 1958 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001741204650012656, + "loss": 1.2223, + "step": 1959 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017409511253549593, + "loss": 0.9828, + "step": 1960 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017406974950537942, + "loss": 0.9954, + "step": 1961 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017404437591453235, + "loss": 1.0307, + "step": 1962 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001740189917665724, + "loss": 0.9331, + "step": 1963 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739935970651188, + "loss": 1.3517, + "step": 1964 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017396819181379232, + "loss": 1.2024, + "step": 1965 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001739427760162151, + "loss": 0.9696, + "step": 1966 + }, + { + "epoch": 2.33, + "learning_rate": 0.00017391734967601102, + "loss": 1.1559, + "step": 1967 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001738919127968052, + "loss": 1.3104, + "step": 1968 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017386646538222443, + "loss": 0.9073, + "step": 1969 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017384100743589697, + "loss": 1.0539, + "step": 1970 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017381553896145255, + "loss": 0.9873, + "step": 1971 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737900599625224, + "loss": 0.9466, + "step": 1972 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737645704427393, + "loss": 1.0639, + "step": 1973 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001737390704057375, + "loss": 0.5843, + "step": 1974 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017371355985515275, + "loss": 1.1318, + "step": 1975 + }, + { + "epoch": 2.34, + "learning_rate": 0.00017368803879462227, + "loss": 1.0116, + "step": 1976 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001736625072277848, + "loss": 0.8845, + "step": 1977 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017363696515828062, + "loss": 0.8081, + "step": 1978 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017361141258975148, + "loss": 0.8795, + "step": 1979 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735858495258406, + "loss": 0.9725, + "step": 1980 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735602759701927, + "loss": 1.0164, + "step": 1981 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017353469192645405, + "loss": 1.2937, + "step": 1982 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001735090973982723, + "loss": 1.0842, + "step": 1983 + }, + { + "epoch": 2.35, + "learning_rate": 0.00017348349238929678, + "loss": 1.0043, + "step": 1984 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017345787690317815, + "loss": 1.1302, + "step": 1985 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017343225094356855, + "loss": 1.195, + "step": 1986 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017340661451412183, + "loss": 1.1449, + "step": 1987 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017338096761849309, + "loss": 1.2244, + "step": 1988 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017335531026033897, + "loss": 0.9273, + "step": 1989 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017332964244331776, + "loss": 1.0448, + "step": 1990 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017330396417108908, + "loss": 1.0074, + "step": 1991 + }, + { + "epoch": 2.36, + "learning_rate": 0.00017327827544731412, + "loss": 0.9284, + "step": 1992 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001732525762756555, + "loss": 1.0307, + "step": 1993 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017322686665977737, + "loss": 1.1526, + "step": 1994 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017320114660334535, + "loss": 0.819, + "step": 1995 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017317541611002656, + "loss": 1.1029, + "step": 1996 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017314967518348962, + "loss": 1.2471, + "step": 1997 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017312392382740462, + "loss": 1.0156, + "step": 1998 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017309816204544317, + "loss": 1.1843, + "step": 1999 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017307238984127832, + "loss": 1.1588, + "step": 2000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00017304660721858457, + "loss": 1.0157, + "step": 2001 + }, + { + "epoch": 2.38, + "learning_rate": 0.000173020814181038, + "loss": 1.0563, + "step": 2002 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017299501073231622, + "loss": 1.1883, + "step": 2003 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017296919687609808, + "loss": 0.9404, + "step": 2004 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017294337261606417, + "loss": 1.2495, + "step": 2005 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017291753795589643, + "loss": 1.0074, + "step": 2006 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017289169289927837, + "loss": 1.1411, + "step": 2007 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017286583744989488, + "loss": 0.9942, + "step": 2008 + }, + { + "epoch": 2.38, + "learning_rate": 0.00017283997161143239, + "loss": 0.952, + "step": 2009 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017281409538757883, + "loss": 1.2966, + "step": 2010 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017278820878202357, + "loss": 1.0836, + "step": 2011 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727623117984575, + "loss": 1.0984, + "step": 2012 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001727364044405729, + "loss": 0.8822, + "step": 2013 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017271048671206366, + "loss": 1.2014, + "step": 2014 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017268455861662503, + "loss": 1.1779, + "step": 2015 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017265862015795384, + "loss": 0.9966, + "step": 2016 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017263267133974832, + "loss": 0.9536, + "step": 2017 + }, + { + "epoch": 2.39, + "learning_rate": 0.00017260671216570822, + "loss": 0.811, + "step": 2018 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017258074263953472, + "loss": 0.8241, + "step": 2019 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017255476276493056, + "loss": 1.1263, + "step": 2020 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017252877254559986, + "loss": 0.995, + "step": 2021 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001725027719852483, + "loss": 1.1481, + "step": 2022 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001724767610875829, + "loss": 1.129, + "step": 2023 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017245073985631238, + "loss": 0.5928, + "step": 2024 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017242470829514672, + "loss": 0.8326, + "step": 2025 + }, + { + "epoch": 2.4, + "learning_rate": 0.00017239866640779745, + "loss": 1.1092, + "step": 2026 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017237261419797756, + "loss": 1.5015, + "step": 2027 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001723465516694016, + "loss": 0.9775, + "step": 2028 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017232047882578548, + "loss": 0.9348, + "step": 2029 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001722943956708466, + "loss": 0.6199, + "step": 2030 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017226830220830384, + "loss": 1.1485, + "step": 2031 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017224219844187764, + "loss": 1.1195, + "step": 2032 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017221608437528973, + "loss": 1.0528, + "step": 2033 + }, + { + "epoch": 2.41, + "learning_rate": 0.00017218996001226345, + "loss": 1.1058, + "step": 2034 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017216382535652355, + "loss": 1.1451, + "step": 2035 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001721376804117963, + "loss": 1.2251, + "step": 2036 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017211152518180936, + "loss": 1.0708, + "step": 2037 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017208535967029188, + "loss": 1.0746, + "step": 2038 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017205918388097456, + "loss": 1.3262, + "step": 2039 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017203299781758943, + "loss": 0.7619, + "step": 2040 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017200680148387007, + "loss": 1.01, + "step": 2041 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001719805948835515, + "loss": 1.1651, + "step": 2042 + }, + { + "epoch": 2.42, + "learning_rate": 0.00017195437802037026, + "loss": 1.4671, + "step": 2043 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017192815089806424, + "loss": 0.9857, + "step": 2044 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001719019135203729, + "loss": 1.2613, + "step": 2045 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017187566589103704, + "loss": 1.4386, + "step": 2046 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001718494080137991, + "loss": 1.0965, + "step": 2047 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017182313989240285, + "loss": 0.752, + "step": 2048 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017179686153059352, + "loss": 0.9126, + "step": 2049 + }, + { + "epoch": 2.43, + "learning_rate": 0.00017177057293211784, + "loss": 1.5075, + "step": 2050 + }, + { + "epoch": 2.43, + "learning_rate": 0.000171744274100724, + "loss": 1.0407, + "step": 2051 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017171796504016166, + "loss": 0.8263, + "step": 2052 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001716916457541819, + "loss": 0.9453, + "step": 2053 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017166531624653722, + "loss": 0.9777, + "step": 2054 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017163897652098172, + "loss": 1.2129, + "step": 2055 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017161262658127086, + "loss": 1.3642, + "step": 2056 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017158626643116152, + "loss": 0.6798, + "step": 2057 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017155989607441213, + "loss": 0.874, + "step": 2058 + }, + { + "epoch": 2.44, + "learning_rate": 0.00017153351551478247, + "loss": 1.0636, + "step": 2059 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001715071247560339, + "loss": 1.0563, + "step": 2060 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714807238019291, + "loss": 1.1984, + "step": 2061 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017145431265623234, + "loss": 0.9444, + "step": 2062 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001714278913227092, + "loss": 0.7809, + "step": 2063 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017140145980512684, + "loss": 1.649, + "step": 2064 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713750181072538, + "loss": 1.0956, + "step": 2065 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001713485662328601, + "loss": 1.2845, + "step": 2066 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017132210418571714, + "loss": 1.0484, + "step": 2067 + }, + { + "epoch": 2.45, + "learning_rate": 0.00017129563196959793, + "loss": 1.0291, + "step": 2068 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017126914958827679, + "loss": 1.1226, + "step": 2069 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001712426570455295, + "loss": 1.0119, + "step": 2070 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017121615434513332, + "loss": 1.1663, + "step": 2071 + }, + { + "epoch": 2.46, + "learning_rate": 0.000171189641490867, + "loss": 1.1353, + "step": 2072 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017116311848651064, + "loss": 1.0761, + "step": 2073 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017113658533584594, + "loss": 1.1978, + "step": 2074 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017111004204265582, + "loss": 1.3881, + "step": 2075 + }, + { + "epoch": 2.46, + "learning_rate": 0.00017108348861072484, + "loss": 1.3945, + "step": 2076 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017105692504383897, + "loss": 1.3796, + "step": 2077 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017103035134578555, + "loss": 1.1721, + "step": 2078 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001710037675203534, + "loss": 1.0061, + "step": 2079 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017097717357133284, + "loss": 1.2456, + "step": 2080 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017095056950251555, + "loss": 0.788, + "step": 2081 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001709239553176947, + "loss": 1.16, + "step": 2082 + }, + { + "epoch": 2.47, + "learning_rate": 0.0001708973310206649, + "loss": 1.0498, + "step": 2083 + }, + { + "epoch": 2.47, + "learning_rate": 0.00017087069661522218, + "loss": 0.8993, + "step": 2084 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017084405210516406, + "loss": 1.2088, + "step": 2085 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001708173974942894, + "loss": 1.0897, + "step": 2086 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017079073278639863, + "loss": 1.2718, + "step": 2087 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017076405798529355, + "loss": 1.2325, + "step": 2088 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017073737309477736, + "loss": 1.0555, + "step": 2089 + }, + { + "epoch": 2.48, + "learning_rate": 0.00017071067811865476, + "loss": 1.1428, + "step": 2090 + }, + { + "epoch": 2.48, + "eval_loss": 2.3191208839416504, + "eval_runtime": 284.1375, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2090 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706839730607319, + "loss": 1.0908, + "step": 2091 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001706572579248163, + "loss": 1.2092, + "step": 2092 + }, + { + "epoch": 2.48, + "learning_rate": 0.000170630532714717, + "loss": 1.1735, + "step": 2093 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001706037974342444, + "loss": 1.2716, + "step": 2094 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017057705208721035, + "loss": 1.0095, + "step": 2095 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001705502966774282, + "loss": 1.3059, + "step": 2096 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017052353120871266, + "loss": 0.8269, + "step": 2097 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001704967556848799, + "loss": 1.0615, + "step": 2098 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017046997010974755, + "loss": 1.2709, + "step": 2099 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017044317448713461, + "loss": 1.1633, + "step": 2100 + }, + { + "epoch": 2.49, + "learning_rate": 0.00017041636882086158, + "loss": 0.9273, + "step": 2101 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017038955311475038, + "loss": 1.3117, + "step": 2102 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001703627273726243, + "loss": 0.8883, + "step": 2103 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017033589159830815, + "loss": 1.1371, + "step": 2104 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017030904579562806, + "loss": 1.5402, + "step": 2105 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017028218996841172, + "loss": 0.9156, + "step": 2106 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017025532412048817, + "loss": 1.0962, + "step": 2107 + }, + { + "epoch": 2.5, + "learning_rate": 0.0001702284482556879, + "loss": 0.9402, + "step": 2108 + }, + { + "epoch": 2.5, + "learning_rate": 0.00017020156237784279, + "loss": 0.8146, + "step": 2109 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001701746664907862, + "loss": 1.1718, + "step": 2110 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017014776059835288, + "loss": 1.0618, + "step": 2111 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017012084470437907, + "loss": 1.4796, + "step": 2112 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017009391881270237, + "loss": 0.8402, + "step": 2113 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017006698292716178, + "loss": 1.1641, + "step": 2114 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001700400370515978, + "loss": 1.241, + "step": 2115 + }, + { + "epoch": 2.51, + "learning_rate": 0.00017001308118985237, + "loss": 0.8683, + "step": 2116 + }, + { + "epoch": 2.51, + "learning_rate": 0.00016998611534576873, + "loss": 1.2697, + "step": 2117 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016995913952319168, + "loss": 0.9233, + "step": 2118 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016993215372596737, + "loss": 1.2472, + "step": 2119 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016990515795794334, + "loss": 1.2541, + "step": 2120 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016987815222296865, + "loss": 1.0016, + "step": 2121 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016985113652489374, + "loss": 1.0678, + "step": 2122 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016982411086757037, + "loss": 1.6066, + "step": 2123 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016979707525485192, + "loss": 1.229, + "step": 2124 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016977002969059302, + "loss": 0.752, + "step": 2125 + }, + { + "epoch": 2.52, + "learning_rate": 0.00016974297417864977, + "loss": 0.8752, + "step": 2126 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001697159087228797, + "loss": 0.8896, + "step": 2127 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016968883332714186, + "loss": 0.9657, + "step": 2128 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696617479952964, + "loss": 1.3657, + "step": 2129 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001696346527312053, + "loss": 0.9876, + "step": 2130 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016960754753873162, + "loss": 1.0165, + "step": 2131 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016958043242174003, + "loss": 1.625, + "step": 2132 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016955330738409655, + "loss": 1.5502, + "step": 2133 + }, + { + "epoch": 2.53, + "learning_rate": 0.00016952617242966864, + "loss": 1.0793, + "step": 2134 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016949902756232507, + "loss": 1.4425, + "step": 2135 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016947187278593622, + "loss": 1.3124, + "step": 2136 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016944470810437365, + "loss": 0.927, + "step": 2137 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016941753352151055, + "loss": 1.1911, + "step": 2138 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016939034904122138, + "loss": 1.0768, + "step": 2139 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016936315466738205, + "loss": 1.1277, + "step": 2140 + }, + { + "epoch": 2.54, + "learning_rate": 0.00016933595040386984, + "loss": 0.812, + "step": 2141 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001693087362545636, + "loss": 0.8299, + "step": 2142 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016928151222334338, + "loss": 1.1125, + "step": 2143 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016925427831409077, + "loss": 1.1835, + "step": 2144 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016922703453068873, + "loss": 1.2007, + "step": 2145 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016919978087702163, + "loss": 0.8524, + "step": 2146 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016917251735697523, + "loss": 0.9497, + "step": 2147 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016914524397443673, + "loss": 1.1004, + "step": 2148 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016911796073329466, + "loss": 0.8347, + "step": 2149 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016909066763743912, + "loss": 0.9492, + "step": 2150 + }, + { + "epoch": 2.55, + "learning_rate": 0.00016906336469076148, + "loss": 1.1406, + "step": 2151 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016903605189715447, + "loss": 1.0137, + "step": 2152 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001690087292605124, + "loss": 1.0624, + "step": 2153 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016898139678473076, + "loss": 1.1767, + "step": 2154 + }, + { + "epoch": 2.56, + "learning_rate": 0.0001689540544737067, + "loss": 1.4184, + "step": 2155 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016892670233133856, + "loss": 0.957, + "step": 2156 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016889934036152618, + "loss": 1.0399, + "step": 2157 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016887196856817073, + "loss": 1.2009, + "step": 2158 + }, + { + "epoch": 2.56, + "learning_rate": 0.00016884458695517495, + "loss": 1.3977, + "step": 2159 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016881719552644273, + "loss": 1.1328, + "step": 2160 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016878979428587955, + "loss": 1.5007, + "step": 2161 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016876238323739221, + "loss": 1.1248, + "step": 2162 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016873496238488899, + "loss": 1.0358, + "step": 2163 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016870753173227945, + "loss": 1.2961, + "step": 2164 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016868009128347459, + "loss": 0.9435, + "step": 2165 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016865264104238683, + "loss": 0.9642, + "step": 2166 + }, + { + "epoch": 2.57, + "learning_rate": 0.00016862518101293, + "loss": 1.0169, + "step": 2167 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016859771119901929, + "loss": 1.0904, + "step": 2168 + }, + { + "epoch": 2.58, + "learning_rate": 0.0001685702316045713, + "loss": 1.3178, + "step": 2169 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016854274223350397, + "loss": 1.1395, + "step": 2170 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016851524308973678, + "loss": 1.1207, + "step": 2171 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016848773417719044, + "loss": 1.3544, + "step": 2172 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016846021549978715, + "loss": 1.3503, + "step": 2173 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016843268706145042, + "loss": 1.4276, + "step": 2174 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016840514886610529, + "loss": 0.9888, + "step": 2175 + }, + { + "epoch": 2.58, + "learning_rate": 0.00016837760091767802, + "loss": 1.0913, + "step": 2176 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001683500432200964, + "loss": 1.4781, + "step": 2177 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016832247577728955, + "loss": 1.2657, + "step": 2178 + }, + { + "epoch": 2.59, + "learning_rate": 0.000168294898593188, + "loss": 0.9206, + "step": 2179 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682673116717236, + "loss": 0.9218, + "step": 2180 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001682397150168297, + "loss": 1.2719, + "step": 2181 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016821210863244096, + "loss": 0.984, + "step": 2182 + }, + { + "epoch": 2.59, + "learning_rate": 0.00016818449252249345, + "loss": 1.4641, + "step": 2183 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001681568666909246, + "loss": 1.2571, + "step": 2184 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016812923114167328, + "loss": 1.2025, + "step": 2185 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016810158587867973, + "loss": 0.9621, + "step": 2186 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016807393090588553, + "loss": 1.0016, + "step": 2187 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016804626622723368, + "loss": 1.031, + "step": 2188 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016801859184666857, + "loss": 0.7573, + "step": 2189 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016799090776813597, + "loss": 1.2694, + "step": 2190 + }, + { + "epoch": 2.6, + "learning_rate": 0.000167963213995583, + "loss": 1.196, + "step": 2191 + }, + { + "epoch": 2.6, + "learning_rate": 0.00016793551053295822, + "loss": 0.8754, + "step": 2192 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016790779738421152, + "loss": 1.1743, + "step": 2193 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678800745532942, + "loss": 1.0921, + "step": 2194 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016785234204415888, + "loss": 0.8778, + "step": 2195 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001678245998607597, + "loss": 1.0528, + "step": 2196 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016779684800705203, + "loss": 1.0255, + "step": 2197 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001677690864869927, + "loss": 0.6344, + "step": 2198 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016774131530453992, + "loss": 0.8691, + "step": 2199 + }, + { + "epoch": 2.61, + "learning_rate": 0.00016771353446365318, + "loss": 1.2061, + "step": 2200 + }, + { + "epoch": 2.61, + "learning_rate": 0.0001676857439682935, + "loss": 1.1759, + "step": 2201 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016765794382242314, + "loss": 1.1118, + "step": 2202 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016763013403000584, + "loss": 1.3005, + "step": 2203 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016760231459500666, + "loss": 1.0415, + "step": 2204 + }, + { + "epoch": 2.62, + "learning_rate": 0.000167574485521392, + "loss": 0.824, + "step": 2205 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016754664681312975, + "loss": 0.6682, + "step": 2206 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016751879847418905, + "loss": 1.9204, + "step": 2207 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016749094050854047, + "loss": 0.9931, + "step": 2208 + }, + { + "epoch": 2.62, + "learning_rate": 0.00016746307292015602, + "loss": 0.8898, + "step": 2209 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016743519571300888, + "loss": 1.3337, + "step": 2210 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016740730889107383, + "loss": 1.2947, + "step": 2211 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673794124583269, + "loss": 1.1882, + "step": 2212 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001673515064187455, + "loss": 1.5408, + "step": 2213 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016732359077630847, + "loss": 1.1273, + "step": 2214 + }, + { + "epoch": 2.63, + "learning_rate": 0.0001672956655349959, + "loss": 0.8954, + "step": 2215 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016726773069878934, + "loss": 1.1747, + "step": 2216 + }, + { + "epoch": 2.63, + "learning_rate": 0.00016723978627167173, + "loss": 0.807, + "step": 2217 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016721183225762727, + "loss": 1.2512, + "step": 2218 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016718386866064166, + "loss": 1.0796, + "step": 2219 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016715589548470185, + "loss": 1.0905, + "step": 2220 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016712791273379622, + "loss": 1.3779, + "step": 2221 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016709992041191452, + "loss": 1.2015, + "step": 2222 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016707191852304782, + "loss": 0.8612, + "step": 2223 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001670439070711886, + "loss": 1.1819, + "step": 2224 + }, + { + "epoch": 2.64, + "learning_rate": 0.00016701588606033064, + "loss": 1.2715, + "step": 2225 + }, + { + "epoch": 2.64, + "learning_rate": 0.0001669878554944692, + "loss": 1.3681, + "step": 2226 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016695981537760072, + "loss": 1.1254, + "step": 2227 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669317657137232, + "loss": 0.9476, + "step": 2228 + }, + { + "epoch": 2.65, + "learning_rate": 0.0001669037065068359, + "loss": 1.235, + "step": 2229 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016687563776093941, + "loss": 0.7356, + "step": 2230 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016684755948003573, + "loss": 0.7901, + "step": 2231 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016681947166812824, + "loss": 1.317, + "step": 2232 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016679137432922163, + "loss": 0.8832, + "step": 2233 + }, + { + "epoch": 2.65, + "learning_rate": 0.00016676326746732195, + "loss": 1.2776, + "step": 2234 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016673515108643665, + "loss": 1.0435, + "step": 2235 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001667070251905745, + "loss": 1.0957, + "step": 2236 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016667888978374567, + "loss": 1.0862, + "step": 2237 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016665074486996165, + "loss": 1.1112, + "step": 2238 + }, + { + "epoch": 2.66, + "learning_rate": 0.0001666225904532352, + "loss": 1.3633, + "step": 2239 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016659442653758064, + "loss": 1.444, + "step": 2240 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016656625312701348, + "loss": 0.8248, + "step": 2241 + }, + { + "epoch": 2.66, + "learning_rate": 0.00016653807022555067, + "loss": 1.2522, + "step": 2242 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001665098778372104, + "loss": 1.2107, + "step": 2243 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001664816759660124, + "loss": 1.0813, + "step": 2244 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016645346461597753, + "loss": 1.1136, + "step": 2245 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016642524379112817, + "loss": 1.1003, + "step": 2246 + }, + { + "epoch": 2.67, + "learning_rate": 0.000166397013495488, + "loss": 1.0635, + "step": 2247 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016636877373308204, + "loss": 1.0575, + "step": 2248 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016634052450793663, + "loss": 0.7693, + "step": 2249 + }, + { + "epoch": 2.67, + "learning_rate": 0.00016631226582407952, + "loss": 1.5965, + "step": 2250 + }, + { + "epoch": 2.67, + "learning_rate": 0.0001662839976855398, + "loss": 1.0989, + "step": 2251 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016625572009634787, + "loss": 0.9198, + "step": 2252 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016622743306053548, + "loss": 1.0896, + "step": 2253 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016619913658213578, + "loss": 1.015, + "step": 2254 + }, + { + "epoch": 2.68, + "learning_rate": 0.0001661708306651832, + "loss": 0.8572, + "step": 2255 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016614251531371353, + "loss": 1.1508, + "step": 2256 + }, + { + "epoch": 2.68, + "learning_rate": 0.000166114190531764, + "loss": 1.1852, + "step": 2257 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016608585632337306, + "loss": 0.932, + "step": 2258 + }, + { + "epoch": 2.68, + "learning_rate": 0.00016605751269258053, + "loss": 1.2542, + "step": 2259 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016602915964342757, + "loss": 0.943, + "step": 2260 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016600079717995678, + "loss": 1.2438, + "step": 2261 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016597242530621203, + "loss": 0.9928, + "step": 2262 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016594404402623845, + "loss": 0.9516, + "step": 2263 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016591565334408265, + "loss": 1.1689, + "step": 2264 + }, + { + "epoch": 2.69, + "learning_rate": 0.0001658872532637925, + "loss": 1.3155, + "step": 2265 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016585884378941725, + "loss": 1.1596, + "step": 2266 + }, + { + "epoch": 2.69, + "learning_rate": 0.00016583042492500746, + "loss": 0.9956, + "step": 2267 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016580199667461508, + "loss": 0.9289, + "step": 2268 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016577355904229325, + "loss": 1.3225, + "step": 2269 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016574511203209667, + "loss": 1.0384, + "step": 2270 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001657166556480812, + "loss": 0.697, + "step": 2271 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016568818989430416, + "loss": 0.7702, + "step": 2272 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016565971477482404, + "loss": 1.1041, + "step": 2273 + }, + { + "epoch": 2.7, + "learning_rate": 0.00016563123029370093, + "loss": 1.0462, + "step": 2274 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001656027364549959, + "loss": 1.0797, + "step": 2275 + }, + { + "epoch": 2.7, + "learning_rate": 0.0001655742332627717, + "loss": 1.3301, + "step": 2276 + }, + { + "epoch": 2.71, + "learning_rate": 0.0001655457207210922, + "loss": 1.0467, + "step": 2277 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016551719883402271, + "loss": 0.9432, + "step": 2278 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016548866760562978, + "loss": 1.1808, + "step": 2279 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016546012703998138, + "loss": 1.1094, + "step": 2280 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016543157714114673, + "loss": 1.3914, + "step": 2281 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016540301791319645, + "loss": 1.0402, + "step": 2282 + }, + { + "epoch": 2.71, + "learning_rate": 0.00016537444936020246, + "loss": 0.9815, + "step": 2283 + }, + { + "epoch": 2.71, + "learning_rate": 0.000165345871486238, + "loss": 0.9722, + "step": 2284 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016531728429537766, + "loss": 0.919, + "step": 2285 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016528868779169738, + "loss": 1.1242, + "step": 2286 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016526008197927436, + "loss": 1.1794, + "step": 2287 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016523146686218718, + "loss": 1.434, + "step": 2288 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016520284244451574, + "loss": 0.8463, + "step": 2289 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016517420873034123, + "loss": 1.1736, + "step": 2290 + }, + { + "epoch": 2.72, + "learning_rate": 0.0001651455657237462, + "loss": 1.0431, + "step": 2291 + }, + { + "epoch": 2.72, + "learning_rate": 0.00016511691342881453, + "loss": 1.2796, + "step": 2292 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650882518496314, + "loss": 1.0578, + "step": 2293 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016505958099028334, + "loss": 1.3914, + "step": 2294 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650309008548582, + "loss": 1.0046, + "step": 2295 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001650022114474451, + "loss": 1.0246, + "step": 2296 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016497351277213458, + "loss": 1.2789, + "step": 2297 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016494480483301836, + "loss": 1.0036, + "step": 2298 + }, + { + "epoch": 2.73, + "learning_rate": 0.00016491608763418968, + "loss": 0.886, + "step": 2299 + }, + { + "epoch": 2.73, + "eval_loss": 2.3017475605010986, + "eval_runtime": 283.8846, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2299 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648873611797429, + "loss": 1.3953, + "step": 2300 + }, + { + "epoch": 2.73, + "learning_rate": 0.0001648586254737738, + "loss": 0.6972, + "step": 2301 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016482988052037947, + "loss": 1.2311, + "step": 2302 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016480112632365833, + "loss": 1.327, + "step": 2303 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647723628877101, + "loss": 0.9534, + "step": 2304 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647435902166358, + "loss": 0.9164, + "step": 2305 + }, + { + "epoch": 2.74, + "learning_rate": 0.0001647148083145378, + "loss": 1.1038, + "step": 2306 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016468601718551976, + "loss": 1.0444, + "step": 2307 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016465721683368666, + "loss": 1.2635, + "step": 2308 + }, + { + "epoch": 2.74, + "learning_rate": 0.00016462840726314486, + "loss": 1.1647, + "step": 2309 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016459958847800187, + "loss": 1.3617, + "step": 2310 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016457076048236675, + "loss": 1.2355, + "step": 2311 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016454192328034962, + "loss": 0.9989, + "step": 2312 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016451307687606213, + "loss": 1.1218, + "step": 2313 + }, + { + "epoch": 2.75, + "learning_rate": 0.00016448422127361706, + "loss": 0.8967, + "step": 2314 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644553564771287, + "loss": 1.159, + "step": 2315 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001644264824907124, + "loss": 1.5901, + "step": 2316 + }, + { + "epoch": 2.75, + "learning_rate": 0.0001643975993184851, + "loss": 0.979, + "step": 2317 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016436870696456482, + "loss": 0.8561, + "step": 2318 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016433980543307107, + "loss": 0.9485, + "step": 2319 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016431089472812444, + "loss": 0.7736, + "step": 2320 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016428197485384707, + "loss": 1.2546, + "step": 2321 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016425304581436226, + "loss": 0.9534, + "step": 2322 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001642241076137947, + "loss": 0.8182, + "step": 2323 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641951602562703, + "loss": 1.1107, + "step": 2324 + }, + { + "epoch": 2.76, + "learning_rate": 0.0001641662037459164, + "loss": 1.0628, + "step": 2325 + }, + { + "epoch": 2.76, + "learning_rate": 0.00016413723808686147, + "loss": 1.6261, + "step": 2326 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001641082632832354, + "loss": 1.0286, + "step": 2327 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001640792793391694, + "loss": 0.5732, + "step": 2328 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016405028625879594, + "loss": 1.0932, + "step": 2329 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016402128404624882, + "loss": 1.2585, + "step": 2330 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016399227270566308, + "loss": 0.8788, + "step": 2331 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639632522411751, + "loss": 1.1397, + "step": 2332 + }, + { + "epoch": 2.77, + "learning_rate": 0.00016393422265692262, + "loss": 1.3517, + "step": 2333 + }, + { + "epoch": 2.77, + "learning_rate": 0.0001639051839570446, + "loss": 1.1346, + "step": 2334 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016387613614568126, + "loss": 0.9594, + "step": 2335 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001638470792269743, + "loss": 1.0674, + "step": 2336 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016381801320506653, + "loss": 0.9123, + "step": 2337 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016378893808410215, + "loss": 1.1909, + "step": 2338 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016375985386822664, + "loss": 1.0474, + "step": 2339 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016373076056158675, + "loss": 0.8844, + "step": 2340 + }, + { + "epoch": 2.78, + "learning_rate": 0.0001637016581683306, + "loss": 1.1606, + "step": 2341 + }, + { + "epoch": 2.78, + "learning_rate": 0.00016367254669260749, + "loss": 0.6206, + "step": 2342 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016364342613856816, + "loss": 0.7225, + "step": 2343 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016361429651036446, + "loss": 1.1782, + "step": 2344 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016358515781214977, + "loss": 1.0911, + "step": 2345 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016355601004807856, + "loss": 1.2727, + "step": 2346 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016352685322230663, + "loss": 0.8294, + "step": 2347 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016349768733899117, + "loss": 1.1661, + "step": 2348 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016346851240229057, + "loss": 0.8267, + "step": 2349 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016343932841636456, + "loss": 1.2873, + "step": 2350 + }, + { + "epoch": 2.79, + "learning_rate": 0.00016341013538537412, + "loss": 1.2459, + "step": 2351 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016338093331348156, + "loss": 0.8939, + "step": 2352 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016335172220485042, + "loss": 1.024, + "step": 2353 + }, + { + "epoch": 2.8, + "learning_rate": 0.0001633225020636456, + "loss": 0.9981, + "step": 2354 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016329327289403325, + "loss": 1.331, + "step": 2355 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016326403470018084, + "loss": 0.7446, + "step": 2356 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016323478748625703, + "loss": 1.1931, + "step": 2357 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016320553125643187, + "loss": 1.1287, + "step": 2358 + }, + { + "epoch": 2.8, + "learning_rate": 0.00016317626601487667, + "loss": 1.109, + "step": 2359 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016314699176576402, + "loss": 0.9946, + "step": 2360 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016311770851326778, + "loss": 0.8347, + "step": 2361 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016308841626156307, + "loss": 0.9214, + "step": 2362 + }, + { + "epoch": 2.81, + "learning_rate": 0.0001630591150148264, + "loss": 0.5907, + "step": 2363 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016302980477723539, + "loss": 1.2412, + "step": 2364 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016300048555296915, + "loss": 1.2908, + "step": 2365 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016297115734620788, + "loss": 1.2345, + "step": 2366 + }, + { + "epoch": 2.81, + "learning_rate": 0.00016294182016113315, + "loss": 1.0418, + "step": 2367 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016291247400192785, + "loss": 1.1457, + "step": 2368 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016288311887277608, + "loss": 1.2529, + "step": 2369 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016285375477786322, + "loss": 1.0013, + "step": 2370 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016282438172137597, + "loss": 0.943, + "step": 2371 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016279499970750226, + "loss": 0.7009, + "step": 2372 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016276560874043137, + "loss": 0.9408, + "step": 2373 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627362088243538, + "loss": 1.1788, + "step": 2374 + }, + { + "epoch": 2.82, + "learning_rate": 0.0001627067999634613, + "loss": 0.8106, + "step": 2375 + }, + { + "epoch": 2.82, + "learning_rate": 0.00016267738216194696, + "loss": 1.1695, + "step": 2376 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001626479554240051, + "loss": 0.9209, + "step": 2377 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016261851975383137, + "loss": 0.9911, + "step": 2378 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016258907515562262, + "loss": 1.3819, + "step": 2379 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162559621633577, + "loss": 0.8926, + "step": 2380 + }, + { + "epoch": 2.83, + "learning_rate": 0.000162530159191894, + "loss": 1.0896, + "step": 2381 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016250068783477424, + "loss": 0.8403, + "step": 2382 + }, + { + "epoch": 2.83, + "learning_rate": 0.00016247120756641972, + "loss": 0.7976, + "step": 2383 + }, + { + "epoch": 2.83, + "learning_rate": 0.0001624417183910337, + "loss": 0.8881, + "step": 2384 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001624122203128207, + "loss": 0.8302, + "step": 2385 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623827133359865, + "loss": 1.3312, + "step": 2386 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623531974647381, + "loss": 1.003, + "step": 2387 + }, + { + "epoch": 2.84, + "learning_rate": 0.0001623236727032839, + "loss": 0.9487, + "step": 2388 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016229413905583342, + "loss": 1.2259, + "step": 2389 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016226459652659753, + "loss": 0.9327, + "step": 2390 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016223504511978838, + "loss": 0.7336, + "step": 2391 + }, + { + "epoch": 2.84, + "learning_rate": 0.00016220548483961934, + "loss": 1.0454, + "step": 2392 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016217591569030505, + "loss": 1.3371, + "step": 2393 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016214633767606143, + "loss": 1.0814, + "step": 2394 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016211675080110566, + "loss": 1.2274, + "step": 2395 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001620871550696562, + "loss": 0.9775, + "step": 2396 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016205755048593273, + "loss": 1.0323, + "step": 2397 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016202793705415622, + "loss": 1.5101, + "step": 2398 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016199831477854893, + "loss": 0.8118, + "step": 2399 + }, + { + "epoch": 2.85, + "learning_rate": 0.0001619686836633343, + "loss": 1.0233, + "step": 2400 + }, + { + "epoch": 2.85, + "learning_rate": 0.00016193904371273715, + "loss": 0.9038, + "step": 2401 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016190939493098344, + "loss": 0.875, + "step": 2402 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016187973732230038, + "loss": 1.3274, + "step": 2403 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016185007089091665, + "loss": 1.081, + "step": 2404 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016182039564106192, + "loss": 1.0841, + "step": 2405 + }, + { + "epoch": 2.86, + "learning_rate": 0.00016179071157696728, + "loss": 1.3208, + "step": 2406 + }, + { + "epoch": 2.86, + "learning_rate": 0.000161761018702865, + "loss": 1.1854, + "step": 2407 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617313170229887, + "loss": 1.0651, + "step": 2408 + }, + { + "epoch": 2.86, + "learning_rate": 0.0001617016065415731, + "loss": 1.1398, + "step": 2409 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016167188726285434, + "loss": 1.2778, + "step": 2410 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016164215919106968, + "loss": 1.6758, + "step": 2411 + }, + { + "epoch": 2.87, + "learning_rate": 0.0001616124223304577, + "loss": 0.8341, + "step": 2412 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016158267668525832, + "loss": 0.9513, + "step": 2413 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016155292225971253, + "loss": 0.9617, + "step": 2414 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016152315905806268, + "loss": 0.8664, + "step": 2415 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016149338708455237, + "loss": 1.331, + "step": 2416 + }, + { + "epoch": 2.87, + "learning_rate": 0.00016146360634342643, + "loss": 1.4212, + "step": 2417 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016143381683893094, + "loss": 1.2126, + "step": 2418 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016140401857531322, + "loss": 0.934, + "step": 2419 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016137421155682183, + "loss": 1.2417, + "step": 2420 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001613443957877067, + "loss": 1.637, + "step": 2421 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016131457127221881, + "loss": 1.1456, + "step": 2422 + }, + { + "epoch": 2.88, + "learning_rate": 0.00016128473801461053, + "loss": 0.9402, + "step": 2423 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612548960191354, + "loss": 1.3797, + "step": 2424 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001612250452900483, + "loss": 0.8191, + "step": 2425 + }, + { + "epoch": 2.88, + "learning_rate": 0.0001611951858316052, + "loss": 1.1725, + "step": 2426 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016116531764806346, + "loss": 1.5701, + "step": 2427 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016113544074368164, + "loss": 1.0591, + "step": 2428 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016110555512271953, + "loss": 1.03, + "step": 2429 + }, + { + "epoch": 2.89, + "learning_rate": 0.0001610756607894382, + "loss": 1.1829, + "step": 2430 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016104575774809985, + "loss": 1.2222, + "step": 2431 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016101584600296804, + "loss": 1.1537, + "step": 2432 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016098592555830753, + "loss": 1.0973, + "step": 2433 + }, + { + "epoch": 2.89, + "learning_rate": 0.00016095599641838436, + "loss": 1.0793, + "step": 2434 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016092605858746573, + "loss": 1.3484, + "step": 2435 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608961120698201, + "loss": 1.1689, + "step": 2436 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016086615686971726, + "loss": 1.0864, + "step": 2437 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016083619299142813, + "loss": 1.2451, + "step": 2438 + }, + { + "epoch": 2.9, + "learning_rate": 0.0001608062204392249, + "loss": 0.9593, + "step": 2439 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016077623921738102, + "loss": 0.9816, + "step": 2440 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016074624933017112, + "loss": 1.0845, + "step": 2441 + }, + { + "epoch": 2.9, + "learning_rate": 0.00016071625078187114, + "loss": 0.9875, + "step": 2442 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001606862435767582, + "loss": 0.8758, + "step": 2443 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016065622771911067, + "loss": 0.9499, + "step": 2444 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016062620321320823, + "loss": 1.1133, + "step": 2445 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605961700633316, + "loss": 0.7228, + "step": 2446 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016056612827376293, + "loss": 1.2297, + "step": 2447 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001605360778487855, + "loss": 1.0251, + "step": 2448 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016050601879268386, + "loss": 0.8097, + "step": 2449 + }, + { + "epoch": 2.91, + "learning_rate": 0.00016047595110974376, + "loss": 0.9872, + "step": 2450 + }, + { + "epoch": 2.91, + "learning_rate": 0.0001604458748042522, + "loss": 1.1119, + "step": 2451 + }, + { + "epoch": 2.92, + "learning_rate": 0.0001604157898804974, + "loss": 0.8256, + "step": 2452 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016038569634276882, + "loss": 0.9036, + "step": 2453 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016035559419535716, + "loss": 1.1173, + "step": 2454 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016032548344255428, + "loss": 1.3173, + "step": 2455 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016029536408865337, + "loss": 0.717, + "step": 2456 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016026523613794878, + "loss": 0.9806, + "step": 2457 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016023509959473605, + "loss": 1.1509, + "step": 2458 + }, + { + "epoch": 2.92, + "learning_rate": 0.00016020495446331207, + "loss": 1.0454, + "step": 2459 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601748007479748, + "loss": 1.183, + "step": 2460 + }, + { + "epoch": 2.93, + "learning_rate": 0.0001601446384530236, + "loss": 1.2611, + "step": 2461 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016011446758275888, + "loss": 1.0377, + "step": 2462 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016008428814148236, + "loss": 1.2111, + "step": 2463 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016005410013349698, + "loss": 1.0952, + "step": 2464 + }, + { + "epoch": 2.93, + "learning_rate": 0.00016002390356310685, + "loss": 0.7589, + "step": 2465 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015999369843461742, + "loss": 0.8543, + "step": 2466 + }, + { + "epoch": 2.93, + "learning_rate": 0.00015996348475233525, + "loss": 1.1509, + "step": 2467 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001599332625205681, + "loss": 1.287, + "step": 2468 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015990303174362512, + "loss": 1.0401, + "step": 2469 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001598727924258164, + "loss": 1.0247, + "step": 2470 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015984254457145354, + "loss": 1.1537, + "step": 2471 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015981228818484917, + "loss": 0.9606, + "step": 2472 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001597820232703172, + "loss": 0.8709, + "step": 2473 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015975174983217275, + "loss": 1.2827, + "step": 2474 + }, + { + "epoch": 2.94, + "learning_rate": 0.00015972146787473213, + "loss": 0.8057, + "step": 2475 + }, + { + "epoch": 2.94, + "learning_rate": 0.0001596911774023129, + "loss": 1.0857, + "step": 2476 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015966087841923386, + "loss": 1.1731, + "step": 2477 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001596305709298149, + "loss": 0.8871, + "step": 2478 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015960025493837727, + "loss": 1.0671, + "step": 2479 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015956993044924334, + "loss": 1.3735, + "step": 2480 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015953959746673675, + "loss": 1.4655, + "step": 2481 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015950925599518228, + "loss": 1.3975, + "step": 2482 + }, + { + "epoch": 2.95, + "learning_rate": 0.00015947890603890602, + "loss": 0.9468, + "step": 2483 + }, + { + "epoch": 2.95, + "learning_rate": 0.0001594485476022352, + "loss": 0.9976, + "step": 2484 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015941818068949818, + "loss": 0.6732, + "step": 2485 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015938780530502474, + "loss": 0.9848, + "step": 2486 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015935742145314568, + "loss": 1.2441, + "step": 2487 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001593270291381931, + "loss": 0.9631, + "step": 2488 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015929662836450029, + "loss": 0.8868, + "step": 2489 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592662191364017, + "loss": 0.9063, + "step": 2490 + }, + { + "epoch": 2.96, + "learning_rate": 0.00015923580145823303, + "loss": 0.6886, + "step": 2491 + }, + { + "epoch": 2.96, + "learning_rate": 0.0001592053753343312, + "loss": 1.0702, + "step": 2492 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591749407690343, + "loss": 1.3879, + "step": 2493 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015914449776668167, + "loss": 1.1048, + "step": 2494 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001591140463316137, + "loss": 0.9921, + "step": 2495 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015908358646817225, + "loss": 1.3042, + "step": 2496 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015905311818070015, + "loss": 0.8413, + "step": 2497 + }, + { + "epoch": 2.97, + "learning_rate": 0.00015902264147354153, + "loss": 1.5201, + "step": 2498 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589921563510417, + "loss": 1.0727, + "step": 2499 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589616628175472, + "loss": 1.0439, + "step": 2500 + }, + { + "epoch": 2.97, + "learning_rate": 0.0001589311608774057, + "loss": 1.2308, + "step": 2501 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015890065053496613, + "loss": 1.1155, + "step": 2502 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015887013179457862, + "loss": 1.3345, + "step": 2503 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015883960466059444, + "loss": 0.9551, + "step": 2504 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001588090691373661, + "loss": 1.0713, + "step": 2505 + }, + { + "epoch": 2.98, + "learning_rate": 0.00015877852522924732, + "loss": 1.299, + "step": 2506 + }, + { + "epoch": 2.98, + "learning_rate": 0.000158747972940593, + "loss": 0.8535, + "step": 2507 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001587174122757592, + "loss": 0.9924, + "step": 2508 + }, + { + "epoch": 2.98, + "eval_loss": 2.328662395477295, + "eval_runtime": 283.7765, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2508 + }, + { + "epoch": 2.98, + "learning_rate": 0.0001586868432391032, + "loss": 1.0512, + "step": 2509 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015865626583498355, + "loss": 1.2775, + "step": 2510 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015862568006775983, + "loss": 0.7054, + "step": 2511 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015859508594179294, + "loss": 0.8524, + "step": 2512 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015856448346144496, + "loss": 0.9871, + "step": 2513 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015853387263107909, + "loss": 0.8642, + "step": 2514 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015850325345505975, + "loss": 1.1789, + "step": 2515 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015847262593775266, + "loss": 1.2765, + "step": 2516 + }, + { + "epoch": 2.99, + "learning_rate": 0.00015844199008352458, + "loss": 0.6272, + "step": 2517 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015841134589674352, + "loss": 1.3037, + "step": 2518 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015838069338177863, + "loss": 1.054, + "step": 2519 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015835003254300039, + "loss": 1.1942, + "step": 2520 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015831936338478025, + "loss": 0.8866, + "step": 2521 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015828868591149104, + "loss": 1.1444, + "step": 2522 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015825800012750666, + "loss": 0.8597, + "step": 2523 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001582273060372023, + "loss": 0.7731, + "step": 2524 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015819660364495416, + "loss": 1.1953, + "step": 2525 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001581658929551398, + "loss": 1.3946, + "step": 2526 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015813517397213791, + "loss": 1.0173, + "step": 2527 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015810444670032831, + "loss": 1.1762, + "step": 2528 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015807371114409202, + "loss": 0.7283, + "step": 2529 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015804296730781135, + "loss": 1.1515, + "step": 2530 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015801221519586958, + "loss": 0.9389, + "step": 2531 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001579814548126514, + "loss": 1.1869, + "step": 2532 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015795068616254247, + "loss": 1.2957, + "step": 2533 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015791990924992981, + "loss": 1.0514, + "step": 2534 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015788912407920148, + "loss": 0.6762, + "step": 2535 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015785833065474683, + "loss": 0.4121, + "step": 2536 + }, + { + "epoch": 3.0, + "learning_rate": 0.00015782752898095627, + "loss": 0.4532, + "step": 2537 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577967190622215, + "loss": 0.4847, + "step": 2538 + }, + { + "epoch": 3.0, + "learning_rate": 0.0001577659009029353, + "loss": 0.8313, + "step": 2539 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015773507450749172, + "loss": 0.5304, + "step": 2540 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015770423988028588, + "loss": 0.6003, + "step": 2541 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015767339702571414, + "loss": 0.3988, + "step": 2542 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015764254594817398, + "loss": 0.6133, + "step": 2543 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001576116866520642, + "loss": 0.4858, + "step": 2544 + }, + { + "epoch": 3.01, + "learning_rate": 0.00015758081914178456, + "loss": 0.3691, + "step": 2545 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575499434217361, + "loss": 0.5441, + "step": 2546 + }, + { + "epoch": 3.01, + "learning_rate": 0.0001575190594963211, + "loss": 0.4605, + "step": 2547 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015748816736994284, + "loss": 0.3681, + "step": 2548 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015745726704700593, + "loss": 0.4113, + "step": 2549 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015742635853191608, + "loss": 0.5233, + "step": 2550 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015739544182908014, + "loss": 0.356, + "step": 2551 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015736451694290616, + "loss": 0.4105, + "step": 2552 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015733358387780337, + "loss": 0.4451, + "step": 2553 + }, + { + "epoch": 3.02, + "learning_rate": 0.00015730264263818212, + "loss": 0.5023, + "step": 2554 + }, + { + "epoch": 3.02, + "learning_rate": 0.000157271693228454, + "loss": 0.3671, + "step": 2555 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001572407356530317, + "loss": 0.7077, + "step": 2556 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015720976991632913, + "loss": 0.4439, + "step": 2557 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015717879602276122, + "loss": 0.5961, + "step": 2558 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001571478139767443, + "loss": 0.4269, + "step": 2559 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015711682378269565, + "loss": 0.3427, + "step": 2560 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015708582544503386, + "loss": 0.5736, + "step": 2561 + }, + { + "epoch": 3.03, + "learning_rate": 0.00015705481896817854, + "loss": 0.3707, + "step": 2562 + }, + { + "epoch": 3.03, + "learning_rate": 0.0001570238043565506, + "loss": 0.4076, + "step": 2563 + }, + { + "epoch": 3.03, + "learning_rate": 0.000156992781614572, + "loss": 0.6514, + "step": 2564 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015696175074666598, + "loss": 0.4012, + "step": 2565 + }, + { + "epoch": 3.04, + "learning_rate": 0.0001569307117572568, + "loss": 0.3492, + "step": 2566 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015689966465076992, + "loss": 0.4121, + "step": 2567 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015686860943163206, + "loss": 0.5769, + "step": 2568 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015683754610427094, + "loss": 0.4872, + "step": 2569 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015680647467311557, + "loss": 0.5518, + "step": 2570 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015677539514259608, + "loss": 0.411, + "step": 2571 + }, + { + "epoch": 3.04, + "learning_rate": 0.00015674430751714361, + "loss": 0.3443, + "step": 2572 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015671321180119074, + "loss": 0.3706, + "step": 2573 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566821079991709, + "loss": 0.6168, + "step": 2574 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001566509961155189, + "loss": 0.3726, + "step": 2575 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015661987615467058, + "loss": 0.3976, + "step": 2576 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015658874812106297, + "loss": 0.3697, + "step": 2577 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015655761201913425, + "loss": 0.2759, + "step": 2578 + }, + { + "epoch": 3.05, + "learning_rate": 0.00015652646785332378, + "loss": 0.3572, + "step": 2579 + }, + { + "epoch": 3.05, + "learning_rate": 0.000156495315628072, + "loss": 0.5333, + "step": 2580 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015646415534782056, + "loss": 0.4004, + "step": 2581 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001564329870170122, + "loss": 0.4736, + "step": 2582 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015640181064009088, + "loss": 0.4814, + "step": 2583 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015637062622150168, + "loss": 0.3351, + "step": 2584 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015633943376569081, + "loss": 0.4497, + "step": 2585 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015630823327710558, + "loss": 0.4202, + "step": 2586 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015627702476019457, + "loss": 0.5934, + "step": 2587 + }, + { + "epoch": 3.06, + "learning_rate": 0.0001562458082194074, + "loss": 0.4664, + "step": 2588 + }, + { + "epoch": 3.06, + "learning_rate": 0.00015621458365919487, + "loss": 0.4077, + "step": 2589 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015618335108400893, + "loss": 0.5244, + "step": 2590 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015615211049830268, + "loss": 0.5042, + "step": 2591 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015612086190653027, + "loss": 0.3442, + "step": 2592 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015608960531314717, + "loss": 0.6337, + "step": 2593 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015605834072260984, + "loss": 0.3542, + "step": 2594 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001560270681393759, + "loss": 0.5113, + "step": 2595 + }, + { + "epoch": 3.07, + "learning_rate": 0.0001559957875679042, + "loss": 0.4346, + "step": 2596 + }, + { + "epoch": 3.07, + "learning_rate": 0.00015596449901265463, + "loss": 0.5231, + "step": 2597 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015593320247808822, + "loss": 0.5193, + "step": 2598 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001559018979686673, + "loss": 0.3575, + "step": 2599 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015587058548885505, + "loss": 0.6356, + "step": 2600 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015583926504311605, + "loss": 0.3313, + "step": 2601 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015580793663591585, + "loss": 0.356, + "step": 2602 + }, + { + "epoch": 3.08, + "learning_rate": 0.00015577660027172127, + "loss": 0.5498, + "step": 2603 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557452559550001, + "loss": 0.3973, + "step": 2604 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001557139036902215, + "loss": 0.4751, + "step": 2605 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015568254348185544, + "loss": 0.4297, + "step": 2606 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015565117533437335, + "loss": 0.4299, + "step": 2607 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015561979925224754, + "loss": 0.4651, + "step": 2608 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015558841523995162, + "loss": 0.474, + "step": 2609 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015555702330196023, + "loss": 0.4143, + "step": 2610 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001555256234427492, + "loss": 0.393, + "step": 2611 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015549421566679546, + "loss": 0.3738, + "step": 2612 + }, + { + "epoch": 3.09, + "learning_rate": 0.00015546279997857704, + "loss": 0.4394, + "step": 2613 + }, + { + "epoch": 3.09, + "learning_rate": 0.0001554313763825732, + "loss": 0.3702, + "step": 2614 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015539994488326418, + "loss": 0.4594, + "step": 2615 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015536850548513147, + "loss": 0.3249, + "step": 2616 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015533705819265764, + "loss": 0.3857, + "step": 2617 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001553056030103264, + "loss": 0.3272, + "step": 2618 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015527413994262257, + "loss": 0.5204, + "step": 2619 + }, + { + "epoch": 3.1, + "learning_rate": 0.00015524266899403206, + "loss": 0.3653, + "step": 2620 + }, + { + "epoch": 3.1, + "learning_rate": 0.000155211190169042, + "loss": 0.4698, + "step": 2621 + }, + { + "epoch": 3.1, + "learning_rate": 0.0001551797034721405, + "loss": 0.5949, + "step": 2622 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015514820890781693, + "loss": 0.4074, + "step": 2623 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015511670648056178, + "loss": 0.3586, + "step": 2624 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550851961948665, + "loss": 0.6494, + "step": 2625 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015505367805522383, + "loss": 0.4914, + "step": 2626 + }, + { + "epoch": 3.11, + "learning_rate": 0.0001550221520661276, + "loss": 0.4594, + "step": 2627 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015499061823207266, + "loss": 0.4102, + "step": 2628 + }, + { + "epoch": 3.11, + "learning_rate": 0.00015495907655755506, + "loss": 0.4229, + "step": 2629 + }, + { + "epoch": 3.11, + "learning_rate": 0.000154927527047072, + "loss": 0.7218, + "step": 2630 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001548959697051217, + "loss": 0.6929, + "step": 2631 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015486440453620358, + "loss": 0.3628, + "step": 2632 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015483283154481815, + "loss": 0.4433, + "step": 2633 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015480125073546704, + "loss": 0.3912, + "step": 2634 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547696621126529, + "loss": 0.3682, + "step": 2635 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015473806568087968, + "loss": 0.354, + "step": 2636 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001547064614446523, + "loss": 0.4789, + "step": 2637 + }, + { + "epoch": 3.12, + "learning_rate": 0.0001546748494084768, + "loss": 0.382, + "step": 2638 + }, + { + "epoch": 3.12, + "learning_rate": 0.00015464322957686041, + "loss": 0.4954, + "step": 2639 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015461160195431148, + "loss": 0.3273, + "step": 2640 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545799665453393, + "loss": 0.3414, + "step": 2641 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015454832335445447, + "loss": 0.5479, + "step": 2642 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001545166723861686, + "loss": 0.4963, + "step": 2643 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015448501364499445, + "loss": 0.5547, + "step": 2644 + }, + { + "epoch": 3.13, + "learning_rate": 0.0001544533471354458, + "loss": 0.4637, + "step": 2645 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015442167286203767, + "loss": 0.4248, + "step": 2646 + }, + { + "epoch": 3.13, + "learning_rate": 0.00015438999082928608, + "loss": 0.4213, + "step": 2647 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015435830104170822, + "loss": 0.3734, + "step": 2648 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015432660350382234, + "loss": 0.4627, + "step": 2649 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001542948982201479, + "loss": 0.3422, + "step": 2650 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015426318519520525, + "loss": 0.4409, + "step": 2651 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015423146443351607, + "loss": 0.3717, + "step": 2652 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015419973593960298, + "loss": 0.4349, + "step": 2653 + }, + { + "epoch": 3.14, + "learning_rate": 0.00015416799971798985, + "loss": 0.5349, + "step": 2654 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001541362557732015, + "loss": 0.4511, + "step": 2655 + }, + { + "epoch": 3.15, + "learning_rate": 0.000154104504109764, + "loss": 0.5997, + "step": 2656 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015407274473220434, + "loss": 0.661, + "step": 2657 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015404097764505083, + "loss": 0.3456, + "step": 2658 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015400920285283268, + "loss": 0.3416, + "step": 2659 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015397742036008034, + "loss": 0.4707, + "step": 2660 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015394563017132526, + "loss": 0.3221, + "step": 2661 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015391383229110007, + "loss": 0.6108, + "step": 2662 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015388202672393834, + "loss": 0.5504, + "step": 2663 + }, + { + "epoch": 3.15, + "learning_rate": 0.00015385021347437498, + "loss": 0.3973, + "step": 2664 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015381839254694583, + "loss": 0.5149, + "step": 2665 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015378656394618787, + "loss": 0.5853, + "step": 2666 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001537547276766391, + "loss": 0.517, + "step": 2667 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015372288374283875, + "loss": 0.5485, + "step": 2668 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015369103214932703, + "loss": 0.4907, + "step": 2669 + }, + { + "epoch": 3.16, + "learning_rate": 0.0001536591729006453, + "loss": 0.3169, + "step": 2670 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015362730600133596, + "loss": 0.5431, + "step": 2671 + }, + { + "epoch": 3.16, + "learning_rate": 0.00015359543145594258, + "loss": 0.2586, + "step": 2672 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015356354926900979, + "loss": 0.5251, + "step": 2673 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015353165944508325, + "loss": 0.4104, + "step": 2674 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015349976198870973, + "loss": 0.4825, + "step": 2675 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015346785690443718, + "loss": 0.5274, + "step": 2676 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534359441968145, + "loss": 0.3878, + "step": 2677 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001534040238703918, + "loss": 0.5132, + "step": 2678 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015337209592972023, + "loss": 0.5145, + "step": 2679 + }, + { + "epoch": 3.17, + "learning_rate": 0.00015334016037935196, + "loss": 0.5548, + "step": 2680 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015330821722384037, + "loss": 0.7494, + "step": 2681 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015327626646773976, + "loss": 0.5569, + "step": 2682 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015324430811560573, + "loss": 0.2622, + "step": 2683 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001532123421719948, + "loss": 0.3749, + "step": 2684 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015318036864146457, + "loss": 0.3959, + "step": 2685 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531483875285738, + "loss": 0.5243, + "step": 2686 + }, + { + "epoch": 3.18, + "learning_rate": 0.0001531163988378823, + "loss": 0.3115, + "step": 2687 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015308440257395093, + "loss": 0.2385, + "step": 2688 + }, + { + "epoch": 3.18, + "learning_rate": 0.00015305239874134174, + "loss": 0.4431, + "step": 2689 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001530203873446177, + "loss": 0.378, + "step": 2690 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015298836838834298, + "loss": 0.4521, + "step": 2691 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015295634187708279, + "loss": 0.6309, + "step": 2692 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015292430781540335, + "loss": 0.4355, + "step": 2693 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015289226620787208, + "loss": 0.4537, + "step": 2694 + }, + { + "epoch": 3.19, + "learning_rate": 0.0001528602170590574, + "loss": 0.4305, + "step": 2695 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015282816037352878, + "loss": 0.5355, + "step": 2696 + }, + { + "epoch": 3.19, + "learning_rate": 0.00015279609615585687, + "loss": 0.5243, + "step": 2697 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001527640244106133, + "loss": 0.5334, + "step": 2698 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015273194514237078, + "loss": 0.5409, + "step": 2699 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001526998583557031, + "loss": 0.4042, + "step": 2700 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015266776405518524, + "loss": 0.5536, + "step": 2701 + }, + { + "epoch": 3.2, + "learning_rate": 0.000152635662245393, + "loss": 0.2743, + "step": 2702 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015260355293090353, + "loss": 0.4762, + "step": 2703 + }, + { + "epoch": 3.2, + "learning_rate": 0.00015257143611629482, + "loss": 0.4552, + "step": 2704 + }, + { + "epoch": 3.2, + "learning_rate": 0.0001525393118061461, + "loss": 0.5395, + "step": 2705 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001525071800050375, + "loss": 0.4297, + "step": 2706 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015247504071755046, + "loss": 0.364, + "step": 2707 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015244289394826722, + "loss": 0.9499, + "step": 2708 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015241073970177126, + "loss": 0.579, + "step": 2709 + }, + { + "epoch": 3.21, + "learning_rate": 0.000152378577982647, + "loss": 0.3111, + "step": 2710 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001523464087954801, + "loss": 0.3345, + "step": 2711 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015231423214485715, + "loss": 0.4628, + "step": 2712 + }, + { + "epoch": 3.21, + "learning_rate": 0.00015228204803536586, + "loss": 0.4803, + "step": 2713 + }, + { + "epoch": 3.21, + "learning_rate": 0.0001522498564715949, + "loss": 0.4164, + "step": 2714 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015221765745813417, + "loss": 0.6468, + "step": 2715 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015218545099957449, + "loss": 0.4495, + "step": 2716 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015215323710050785, + "loss": 0.4184, + "step": 2717 + }, + { + "epoch": 3.22, + "eval_loss": 2.9206559658050537, + "eval_runtime": 283.9002, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 2717 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015212101576552722, + "loss": 0.4215, + "step": 2718 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015208878699922668, + "loss": 1.4488, + "step": 2719 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520565508062013, + "loss": 0.4449, + "step": 2720 + }, + { + "epoch": 3.22, + "learning_rate": 0.0001520243071910473, + "loss": 0.2853, + "step": 2721 + }, + { + "epoch": 3.22, + "learning_rate": 0.00015199205615836191, + "loss": 0.4572, + "step": 2722 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015195979771274342, + "loss": 0.4436, + "step": 2723 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001519275318587912, + "loss": 0.38, + "step": 2724 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015189525860110563, + "loss": 0.4956, + "step": 2725 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015186297794428816, + "loss": 0.8514, + "step": 2726 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015183068989294132, + "loss": 0.4518, + "step": 2727 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015179839445166873, + "loss": 0.5581, + "step": 2728 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015176609162507497, + "loss": 0.5828, + "step": 2729 + }, + { + "epoch": 3.23, + "learning_rate": 0.00015173378141776568, + "loss": 0.4109, + "step": 2730 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015170146383434768, + "loss": 0.5762, + "step": 2731 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015166913887942868, + "loss": 0.4502, + "step": 2732 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015163680655761757, + "loss": 0.3736, + "step": 2733 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015160446687352417, + "loss": 0.3771, + "step": 2734 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015157211983175947, + "loss": 0.469, + "step": 2735 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015153976543693542, + "loss": 0.665, + "step": 2736 + }, + { + "epoch": 3.24, + "learning_rate": 0.00015150740369366508, + "loss": 0.3495, + "step": 2737 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514750346065625, + "loss": 0.4513, + "step": 2738 + }, + { + "epoch": 3.24, + "learning_rate": 0.0001514426581802428, + "loss": 0.4571, + "step": 2739 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015141027441932216, + "loss": 0.4197, + "step": 2740 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015137788332841785, + "loss": 0.3396, + "step": 2741 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015134548491214806, + "loss": 0.3547, + "step": 2742 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015131307917513214, + "loss": 0.3073, + "step": 2743 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015128066612199044, + "loss": 0.7091, + "step": 2744 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015124824575734428, + "loss": 0.2845, + "step": 2745 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015121581808581622, + "loss": 0.2903, + "step": 2746 + }, + { + "epoch": 3.25, + "learning_rate": 0.00015118338311202964, + "loss": 0.4065, + "step": 2747 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015115094084060916, + "loss": 0.6152, + "step": 2748 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015111849127618022, + "loss": 0.5352, + "step": 2749 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001510860344233695, + "loss": 0.414, + "step": 2750 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015105357028680457, + "loss": 0.4756, + "step": 2751 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015102109887111422, + "loss": 0.4644, + "step": 2752 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015098862018092808, + "loss": 0.4231, + "step": 2753 + }, + { + "epoch": 3.26, + "learning_rate": 0.00015095613422087692, + "loss": 0.4617, + "step": 2754 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001509236409955925, + "loss": 0.5876, + "step": 2755 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508911405097077, + "loss": 0.5696, + "step": 2756 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015085863276785637, + "loss": 0.3826, + "step": 2757 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001508261177746734, + "loss": 0.4338, + "step": 2758 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001507935955347947, + "loss": 0.3546, + "step": 2759 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015076106605285724, + "loss": 0.413, + "step": 2760 + }, + { + "epoch": 3.27, + "learning_rate": 0.000150728529333499, + "loss": 0.3954, + "step": 2761 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015069598538135906, + "loss": 0.5214, + "step": 2762 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001506634342010774, + "loss": 0.5239, + "step": 2763 + }, + { + "epoch": 3.27, + "learning_rate": 0.00015063087579729519, + "loss": 0.8681, + "step": 2764 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015059831017465449, + "loss": 0.4616, + "step": 2765 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015056573733779848, + "loss": 0.4721, + "step": 2766 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015053315729137128, + "loss": 0.4449, + "step": 2767 + }, + { + "epoch": 3.28, + "learning_rate": 0.0001505005700400182, + "loss": 0.569, + "step": 2768 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015046797558838535, + "loss": 0.4926, + "step": 2769 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015043537394112007, + "loss": 0.462, + "step": 2770 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015040276510287063, + "loss": 0.6983, + "step": 2771 + }, + { + "epoch": 3.28, + "learning_rate": 0.00015037014907828632, + "loss": 0.4644, + "step": 2772 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001503375258720175, + "loss": 0.5924, + "step": 2773 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015030489548871544, + "loss": 0.5282, + "step": 2774 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015027225793303264, + "loss": 0.4757, + "step": 2775 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015023961320962247, + "loss": 0.5014, + "step": 2776 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001502069613231393, + "loss": 0.3455, + "step": 2777 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015017430227823864, + "loss": 0.4525, + "step": 2778 + }, + { + "epoch": 3.29, + "learning_rate": 0.0001501416360795769, + "loss": 0.51, + "step": 2779 + }, + { + "epoch": 3.29, + "learning_rate": 0.00015010896273181165, + "loss": 0.3766, + "step": 2780 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001500762822396013, + "loss": 0.3162, + "step": 2781 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015004359460760546, + "loss": 0.406, + "step": 2782 + }, + { + "epoch": 3.3, + "learning_rate": 0.00015001089984048463, + "loss": 0.4671, + "step": 2783 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014997819794290034, + "loss": 0.4299, + "step": 2784 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014994548891951524, + "loss": 0.5494, + "step": 2785 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001499127727749929, + "loss": 0.351, + "step": 2786 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014988004951399785, + "loss": 0.3807, + "step": 2787 + }, + { + "epoch": 3.3, + "learning_rate": 0.00014984731914119586, + "loss": 0.3999, + "step": 2788 + }, + { + "epoch": 3.3, + "learning_rate": 0.0001498145816612534, + "loss": 0.7609, + "step": 2789 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014978183707883827, + "loss": 0.4466, + "step": 2790 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014974908539861905, + "loss": 0.592, + "step": 2791 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014971632662526545, + "loss": 0.4786, + "step": 2792 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014968356076344814, + "loss": 0.4087, + "step": 2793 + }, + { + "epoch": 3.31, + "learning_rate": 0.0001496507878178388, + "loss": 0.3811, + "step": 2794 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014961800779311014, + "loss": 0.4091, + "step": 2795 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014958522069393593, + "loss": 0.6861, + "step": 2796 + }, + { + "epoch": 3.31, + "learning_rate": 0.00014955242652499084, + "loss": 0.3346, + "step": 2797 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014951962529095064, + "loss": 0.5417, + "step": 2798 + }, + { + "epoch": 3.32, + "learning_rate": 0.000149486816996492, + "loss": 0.7325, + "step": 2799 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014945400164629278, + "loss": 0.5007, + "step": 2800 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014942117924503164, + "loss": 0.4217, + "step": 2801 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014938834979738835, + "loss": 0.5265, + "step": 2802 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014935551330804372, + "loss": 0.4376, + "step": 2803 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001493226697816795, + "loss": 0.5068, + "step": 2804 + }, + { + "epoch": 3.32, + "learning_rate": 0.00014928981922297842, + "loss": 0.6248, + "step": 2805 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492569616366243, + "loss": 0.593, + "step": 2806 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001492240970273019, + "loss": 0.6713, + "step": 2807 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014919122539969697, + "loss": 0.5736, + "step": 2808 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014915834675849633, + "loss": 0.3006, + "step": 2809 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014912546110838775, + "loss": 0.5175, + "step": 2810 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014909256845405998, + "loss": 0.52, + "step": 2811 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014905966880020282, + "loss": 0.5491, + "step": 2812 + }, + { + "epoch": 3.33, + "learning_rate": 0.00014902676215150702, + "loss": 0.6007, + "step": 2813 + }, + { + "epoch": 3.33, + "learning_rate": 0.0001489938485126644, + "loss": 0.6552, + "step": 2814 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014896092788836763, + "loss": 0.3624, + "step": 2815 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001489280002833106, + "loss": 0.2626, + "step": 2816 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014889506570218796, + "loss": 0.409, + "step": 2817 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014886212414969553, + "loss": 0.473, + "step": 2818 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014882917563052998, + "loss": 0.4205, + "step": 2819 + }, + { + "epoch": 3.34, + "learning_rate": 0.00014879622014938915, + "loss": 0.4603, + "step": 2820 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487632577109717, + "loss": 0.3522, + "step": 2821 + }, + { + "epoch": 3.34, + "learning_rate": 0.0001487302883199774, + "loss": 0.3787, + "step": 2822 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014869731198110695, + "loss": 0.6, + "step": 2823 + }, + { + "epoch": 3.35, + "learning_rate": 0.000148664328699062, + "loss": 0.4291, + "step": 2824 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014863133847854533, + "loss": 0.4358, + "step": 2825 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485983413242606, + "loss": 0.4144, + "step": 2826 + }, + { + "epoch": 3.35, + "learning_rate": 0.0001485653372409125, + "loss": 0.842, + "step": 2827 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014853232623320662, + "loss": 0.3398, + "step": 2828 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014849930830584972, + "loss": 0.5005, + "step": 2829 + }, + { + "epoch": 3.35, + "learning_rate": 0.00014846628346354933, + "loss": 0.5777, + "step": 2830 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014843325171101413, + "loss": 0.3953, + "step": 2831 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014840021305295373, + "loss": 0.4056, + "step": 2832 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014836716749407872, + "loss": 0.7682, + "step": 2833 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001483341150391006, + "loss": 0.3208, + "step": 2834 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014830105569273204, + "loss": 0.4317, + "step": 2835 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014826798945968654, + "loss": 0.363, + "step": 2836 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014823491634467862, + "loss": 0.3784, + "step": 2837 + }, + { + "epoch": 3.36, + "learning_rate": 0.00014820183635242374, + "loss": 0.9267, + "step": 2838 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001481687494876385, + "loss": 0.4245, + "step": 2839 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014813565575504022, + "loss": 0.3929, + "step": 2840 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014810255515934747, + "loss": 0.5171, + "step": 2841 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014806944770527958, + "loss": 0.5181, + "step": 2842 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014803633339755703, + "loss": 0.4765, + "step": 2843 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014800321224090114, + "loss": 0.4433, + "step": 2844 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014797008424003428, + "loss": 0.461, + "step": 2845 + }, + { + "epoch": 3.37, + "learning_rate": 0.0001479369493996798, + "loss": 0.5688, + "step": 2846 + }, + { + "epoch": 3.37, + "learning_rate": 0.00014790380772456197, + "loss": 0.4822, + "step": 2847 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001478706592194061, + "loss": 0.4993, + "step": 2848 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014783750388893842, + "loss": 0.3967, + "step": 2849 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014780434173788617, + "loss": 0.4708, + "step": 2850 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014777117277097758, + "loss": 0.5721, + "step": 2851 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014773799699294176, + "loss": 0.5276, + "step": 2852 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014770481440850891, + "loss": 0.4135, + "step": 2853 + }, + { + "epoch": 3.38, + "learning_rate": 0.0001476716250224101, + "loss": 0.716, + "step": 2854 + }, + { + "epoch": 3.38, + "learning_rate": 0.00014763842883937743, + "loss": 0.3663, + "step": 2855 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014760522586414396, + "loss": 0.4105, + "step": 2856 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014757201610144372, + "loss": 0.4554, + "step": 2857 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014753879955601163, + "loss": 0.4366, + "step": 2858 + }, + { + "epoch": 3.39, + "learning_rate": 0.0001475055762325837, + "loss": 0.3752, + "step": 2859 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014747234613589685, + "loss": 0.3747, + "step": 2860 + }, + { + "epoch": 3.39, + "learning_rate": 0.000147439109270689, + "loss": 0.5533, + "step": 2861 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014740586564169892, + "loss": 0.4962, + "step": 2862 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014737261525366648, + "loss": 0.5318, + "step": 2863 + }, + { + "epoch": 3.39, + "learning_rate": 0.00014733935811133244, + "loss": 0.4592, + "step": 2864 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014730609421943855, + "loss": 0.429, + "step": 2865 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014727282358272754, + "loss": 0.4163, + "step": 2866 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014723954620594304, + "loss": 0.4811, + "step": 2867 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001472062620938297, + "loss": 0.4662, + "step": 2868 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014717297125113311, + "loss": 0.531, + "step": 2869 + }, + { + "epoch": 3.4, + "learning_rate": 0.0001471396736825998, + "loss": 0.3233, + "step": 2870 + }, + { + "epoch": 3.4, + "learning_rate": 0.00014710636939297724, + "loss": 0.4171, + "step": 2871 + }, + { + "epoch": 3.4, + "learning_rate": 0.000147073058387014, + "loss": 0.5412, + "step": 2872 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014703974066945943, + "loss": 0.4357, + "step": 2873 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014700641624506392, + "loss": 0.3889, + "step": 2874 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469730851185788, + "loss": 0.456, + "step": 2875 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014693974729475636, + "loss": 0.4365, + "step": 2876 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001469064027783499, + "loss": 0.3947, + "step": 2877 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014687305157411355, + "loss": 0.5718, + "step": 2878 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001468396936868025, + "loss": 0.4652, + "step": 2879 + }, + { + "epoch": 3.41, + "learning_rate": 0.00014680632912117286, + "loss": 0.4242, + "step": 2880 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001467729578819817, + "loss": 0.5045, + "step": 2881 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014673957997398695, + "loss": 0.4098, + "step": 2882 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014670619540194766, + "loss": 0.597, + "step": 2883 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014667280417062374, + "loss": 0.5208, + "step": 2884 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014663940628477598, + "loss": 0.4881, + "step": 2885 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014660600174916627, + "loss": 0.5234, + "step": 2886 + }, + { + "epoch": 3.42, + "learning_rate": 0.0001465725905685573, + "loss": 0.439, + "step": 2887 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014653917274771284, + "loss": 0.4498, + "step": 2888 + }, + { + "epoch": 3.42, + "learning_rate": 0.00014650574829139747, + "loss": 0.4837, + "step": 2889 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014647231720437686, + "loss": 0.4232, + "step": 2890 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014643887949141753, + "loss": 0.4467, + "step": 2891 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014640543515728695, + "loss": 0.3566, + "step": 2892 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014637198420675354, + "loss": 0.3888, + "step": 2893 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014633852664458673, + "loss": 0.326, + "step": 2894 + }, + { + "epoch": 3.43, + "learning_rate": 0.0001463050624755568, + "loss": 0.3608, + "step": 2895 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014627159170443502, + "loss": 0.5326, + "step": 2896 + }, + { + "epoch": 3.43, + "learning_rate": 0.00014623811433599359, + "loss": 0.3171, + "step": 2897 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014620463037500568, + "loss": 0.4619, + "step": 2898 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014617113982624526, + "loss": 0.7739, + "step": 2899 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014613764269448751, + "loss": 0.4327, + "step": 2900 + }, + { + "epoch": 3.44, + "learning_rate": 0.0001461041389845083, + "loss": 0.6078, + "step": 2901 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014607062870108456, + "loss": 0.3863, + "step": 2902 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014603711184899408, + "loss": 0.4787, + "step": 2903 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014600358843301568, + "loss": 0.2997, + "step": 2904 + }, + { + "epoch": 3.44, + "learning_rate": 0.00014597005845792905, + "loss": 0.3657, + "step": 2905 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014593652192851486, + "loss": 0.334, + "step": 2906 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014590297884955463, + "loss": 0.6809, + "step": 2907 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001458694292258309, + "loss": 0.4739, + "step": 2908 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014583587306212713, + "loss": 0.4139, + "step": 2909 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014580231036322768, + "loss": 0.3307, + "step": 2910 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014576874113391789, + "loss": 0.4155, + "step": 2911 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014573516537898394, + "loss": 0.4461, + "step": 2912 + }, + { + "epoch": 3.45, + "learning_rate": 0.00014570158310321305, + "loss": 0.4775, + "step": 2913 + }, + { + "epoch": 3.45, + "learning_rate": 0.0001456679943113933, + "loss": 0.344, + "step": 2914 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014563439900831373, + "loss": 0.3568, + "step": 2915 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014560079719876424, + "loss": 0.3808, + "step": 2916 + }, + { + "epoch": 3.46, + "learning_rate": 0.0001455671888875358, + "loss": 0.5467, + "step": 2917 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014553357407942022, + "loss": 0.5267, + "step": 2918 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014549995277921015, + "loss": 0.4476, + "step": 2919 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014546632499169937, + "loss": 0.4463, + "step": 2920 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014543269072168235, + "loss": 0.5553, + "step": 2921 + }, + { + "epoch": 3.46, + "learning_rate": 0.00014539904997395468, + "loss": 0.5476, + "step": 2922 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001453654027533128, + "loss": 0.4443, + "step": 2923 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014533174906455404, + "loss": 0.4353, + "step": 2924 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014529808891247667, + "loss": 0.4479, + "step": 2925 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014526442230187995, + "loss": 0.3951, + "step": 2926 + }, + { + "epoch": 3.47, + "eval_loss": 2.882225751876831, + "eval_runtime": 283.9462, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 2926 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014523074923756394, + "loss": 0.679, + "step": 2927 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451970697243297, + "loss": 0.4178, + "step": 2928 + }, + { + "epoch": 3.47, + "learning_rate": 0.0001451633837669792, + "loss": 0.4121, + "step": 2929 + }, + { + "epoch": 3.47, + "learning_rate": 0.00014512969137031538, + "loss": 0.3929, + "step": 2930 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014509599253914195, + "loss": 0.366, + "step": 2931 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001450622872782637, + "loss": 0.3528, + "step": 2932 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014502857559248617, + "loss": 0.5003, + "step": 2933 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014499485748661604, + "loss": 0.4901, + "step": 2934 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014496113296546067, + "loss": 0.4538, + "step": 2935 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014492740203382847, + "loss": 0.4549, + "step": 2936 + }, + { + "epoch": 3.48, + "learning_rate": 0.0001448936646965288, + "loss": 0.5464, + "step": 2937 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014485992095837177, + "loss": 0.43, + "step": 2938 + }, + { + "epoch": 3.48, + "learning_rate": 0.00014482617082416858, + "loss": 0.5893, + "step": 2939 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447924142987312, + "loss": 0.4947, + "step": 2940 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014475865138687262, + "loss": 0.4903, + "step": 2941 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001447248820934067, + "loss": 0.4933, + "step": 2942 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014469110642314817, + "loss": 0.4516, + "step": 2943 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001446573243809127, + "loss": 0.469, + "step": 2944 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014462353597151684, + "loss": 0.6531, + "step": 2945 + }, + { + "epoch": 3.49, + "learning_rate": 0.00014458974119977818, + "loss": 0.2754, + "step": 2946 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001445559400705151, + "loss": 0.5676, + "step": 2947 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014452213258854684, + "loss": 0.5903, + "step": 2948 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014448831875869364, + "loss": 0.5022, + "step": 2949 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001444544985857766, + "loss": 0.3509, + "step": 2950 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014442067207461775, + "loss": 0.3921, + "step": 2951 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014438683923004005, + "loss": 0.4997, + "step": 2952 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014435300005686728, + "loss": 0.6218, + "step": 2953 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014431915455992414, + "loss": 0.4097, + "step": 2954 + }, + { + "epoch": 3.5, + "learning_rate": 0.00014428530274403632, + "loss": 0.3478, + "step": 2955 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014425144461403035, + "loss": 0.4506, + "step": 2956 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014421758017473362, + "loss": 0.4025, + "step": 2957 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014418370943097448, + "loss": 0.3838, + "step": 2958 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014414983238758217, + "loss": 0.6366, + "step": 2959 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014411594904938682, + "loss": 0.4649, + "step": 2960 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014408205942121943, + "loss": 0.3361, + "step": 2961 + }, + { + "epoch": 3.51, + "learning_rate": 0.00014404816350791188, + "loss": 0.3692, + "step": 2962 + }, + { + "epoch": 3.51, + "learning_rate": 0.0001440142613142971, + "loss": 0.6162, + "step": 2963 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014398035284520874, + "loss": 0.5935, + "step": 2964 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001439464381054814, + "loss": 0.545, + "step": 2965 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014391251709995061, + "loss": 0.4178, + "step": 2966 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014387858983345276, + "loss": 0.5552, + "step": 2967 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001438446563108251, + "loss": 0.4506, + "step": 2968 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014381071653690587, + "loss": 0.429, + "step": 2969 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014377677051653404, + "loss": 0.3897, + "step": 2970 + }, + { + "epoch": 3.52, + "learning_rate": 0.0001437428182545497, + "loss": 0.4663, + "step": 2971 + }, + { + "epoch": 3.52, + "learning_rate": 0.00014370885975579364, + "loss": 0.4643, + "step": 2972 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436748950251076, + "loss": 0.5433, + "step": 2973 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001436409240673342, + "loss": 0.4967, + "step": 2974 + }, + { + "epoch": 3.53, + "learning_rate": 0.000143606946887317, + "loss": 0.3717, + "step": 2975 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014357296348990037, + "loss": 0.4166, + "step": 2976 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001435389738799296, + "loss": 0.455, + "step": 2977 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014350497806225087, + "loss": 0.4603, + "step": 2978 + }, + { + "epoch": 3.53, + "learning_rate": 0.00014347097604171127, + "loss": 0.4325, + "step": 2979 + }, + { + "epoch": 3.53, + "learning_rate": 0.0001434369678231587, + "loss": 0.4375, + "step": 2980 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014340295341144202, + "loss": 0.4932, + "step": 2981 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014336893281141096, + "loss": 0.5264, + "step": 2982 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014333490602791608, + "loss": 0.4677, + "step": 2983 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014330087306580887, + "loss": 0.6505, + "step": 2984 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014326683392994167, + "loss": 0.4451, + "step": 2985 + }, + { + "epoch": 3.54, + "learning_rate": 0.00014323278862516775, + "loss": 0.4025, + "step": 2986 + }, + { + "epoch": 3.54, + "learning_rate": 0.0001431987371563412, + "loss": 0.5084, + "step": 2987 + }, + { + "epoch": 3.54, + "learning_rate": 0.000143164679528317, + "loss": 0.4806, + "step": 2988 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014313061574595115, + "loss": 0.3954, + "step": 2989 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014309654581410024, + "loss": 0.4339, + "step": 2990 + }, + { + "epoch": 3.55, + "learning_rate": 0.000143062469737622, + "loss": 0.6739, + "step": 2991 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014302838752137487, + "loss": 0.6414, + "step": 2992 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014299429917021827, + "loss": 0.5075, + "step": 2993 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014296020468901246, + "loss": 0.4105, + "step": 2994 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014292610408261856, + "loss": 0.7371, + "step": 2995 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014289199735589852, + "loss": 0.7485, + "step": 2996 + }, + { + "epoch": 3.55, + "learning_rate": 0.00014285788451371534, + "loss": 0.7629, + "step": 2997 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014282376556093264, + "loss": 0.3849, + "step": 2998 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014278964050241512, + "loss": 0.5355, + "step": 2999 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014275550934302823, + "loss": 0.4077, + "step": 3000 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014272137208763832, + "loss": 0.5352, + "step": 3001 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014268722874111265, + "loss": 0.5257, + "step": 3002 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014265307930831932, + "loss": 0.4265, + "step": 3003 + }, + { + "epoch": 3.56, + "learning_rate": 0.00014261892379412728, + "loss": 0.5776, + "step": 3004 + }, + { + "epoch": 3.56, + "learning_rate": 0.0001425847622034063, + "loss": 0.3521, + "step": 3005 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014255059454102722, + "loss": 0.6203, + "step": 3006 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014251642081186146, + "loss": 0.5238, + "step": 3007 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014248224102078152, + "loss": 0.3887, + "step": 3008 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014244805517266067, + "loss": 0.5001, + "step": 3009 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001424138632723731, + "loss": 0.555, + "step": 3010 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423796653247938, + "loss": 0.6137, + "step": 3011 + }, + { + "epoch": 3.57, + "learning_rate": 0.00014234546133479867, + "loss": 0.8052, + "step": 3012 + }, + { + "epoch": 3.57, + "learning_rate": 0.0001423112513072644, + "loss": 0.5392, + "step": 3013 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014227703524706867, + "loss": 0.5067, + "step": 3014 + }, + { + "epoch": 3.58, + "learning_rate": 0.0001422428131590899, + "loss": 0.4016, + "step": 3015 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014220858504820742, + "loss": 0.4165, + "step": 3016 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014217435091930141, + "loss": 0.7395, + "step": 3017 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014214011077725292, + "loss": 0.4985, + "step": 3018 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014210586462694384, + "loss": 0.4821, + "step": 3019 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014207161247325691, + "loss": 0.6046, + "step": 3020 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014203735432107576, + "loss": 0.568, + "step": 3021 + }, + { + "epoch": 3.58, + "learning_rate": 0.00014200309017528486, + "loss": 0.7383, + "step": 3022 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001419688200407695, + "loss": 0.5296, + "step": 3023 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014193454392241592, + "loss": 0.6391, + "step": 3024 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014190026182511102, + "loss": 0.4523, + "step": 3025 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001418659737537428, + "loss": 0.482, + "step": 3026 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014183167971319998, + "loss": 0.4519, + "step": 3027 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014179737970837207, + "loss": 0.4156, + "step": 3028 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014176307374414956, + "loss": 0.5142, + "step": 3029 + }, + { + "epoch": 3.59, + "learning_rate": 0.00014172876182542372, + "loss": 0.4068, + "step": 3030 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014169444395708666, + "loss": 0.5908, + "step": 3031 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001416601201440314, + "loss": 0.511, + "step": 3032 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014162579039115174, + "loss": 0.5165, + "step": 3033 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014159145470334235, + "loss": 0.4449, + "step": 3034 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014155711308549878, + "loss": 0.4808, + "step": 3035 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014152276554251736, + "loss": 0.5365, + "step": 3036 + }, + { + "epoch": 3.6, + "learning_rate": 0.00014148841207929527, + "loss": 0.6016, + "step": 3037 + }, + { + "epoch": 3.6, + "learning_rate": 0.0001414540527007307, + "loss": 0.379, + "step": 3038 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014141968741172238, + "loss": 0.6687, + "step": 3039 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014138531621717018, + "loss": 0.6219, + "step": 3040 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001413509391219746, + "loss": 0.3408, + "step": 3041 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014131655613103708, + "loss": 0.5148, + "step": 3042 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412821672492599, + "loss": 0.3811, + "step": 3043 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001412477724815462, + "loss": 0.4691, + "step": 3044 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014121337183279988, + "loss": 0.6919, + "step": 3045 + }, + { + "epoch": 3.61, + "learning_rate": 0.0001411789653079257, + "loss": 0.5804, + "step": 3046 + }, + { + "epoch": 3.61, + "learning_rate": 0.00014114455291182933, + "loss": 0.418, + "step": 3047 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001411101346494172, + "loss": 0.4422, + "step": 3048 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410757105255966, + "loss": 0.389, + "step": 3049 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410412805452757, + "loss": 0.4083, + "step": 3050 + }, + { + "epoch": 3.62, + "learning_rate": 0.0001410068447133634, + "loss": 0.8703, + "step": 3051 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014097240303476954, + "loss": 0.4724, + "step": 3052 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014093795551440474, + "loss": 0.6257, + "step": 3053 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014090350215718048, + "loss": 0.5212, + "step": 3054 + }, + { + "epoch": 3.62, + "learning_rate": 0.00014086904296800902, + "loss": 0.4429, + "step": 3055 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014083457795180355, + "loss": 0.3496, + "step": 3056 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014080010711347798, + "loss": 0.3402, + "step": 3057 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001407656304579471, + "loss": 0.4783, + "step": 3058 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014073114799012653, + "loss": 0.3987, + "step": 3059 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014069665971493274, + "loss": 0.4755, + "step": 3060 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014066216563728303, + "loss": 0.4792, + "step": 3061 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014062766576209546, + "loss": 0.4275, + "step": 3062 + }, + { + "epoch": 3.63, + "learning_rate": 0.00014059316009428893, + "loss": 0.3598, + "step": 3063 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014055864863878325, + "loss": 0.4887, + "step": 3064 + }, + { + "epoch": 3.64, + "learning_rate": 0.000140524131400499, + "loss": 0.5421, + "step": 3065 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014048960838435753, + "loss": 0.352, + "step": 3066 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014045507959528118, + "loss": 0.3124, + "step": 3067 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014042054503819287, + "loss": 0.3955, + "step": 3068 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014038600471801658, + "loss": 0.455, + "step": 3069 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014035145863967692, + "loss": 0.5177, + "step": 3070 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014031690680809945, + "loss": 0.4205, + "step": 3071 + }, + { + "epoch": 3.64, + "learning_rate": 0.00014028234922821054, + "loss": 0.4832, + "step": 3072 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402477859049373, + "loss": 0.3496, + "step": 3073 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001402132168432077, + "loss": 0.5404, + "step": 3074 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014017864204795058, + "loss": 0.5106, + "step": 3075 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001401440615240955, + "loss": 0.6611, + "step": 3076 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014010947527657295, + "loss": 0.3879, + "step": 3077 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400748833103141, + "loss": 0.3054, + "step": 3078 + }, + { + "epoch": 3.65, + "learning_rate": 0.00014004028563025108, + "loss": 0.3461, + "step": 3079 + }, + { + "epoch": 3.65, + "learning_rate": 0.0001400056822413167, + "loss": 0.482, + "step": 3080 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001399710731484447, + "loss": 0.3285, + "step": 3081 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013993645835656953, + "loss": 0.363, + "step": 3082 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013990183787062661, + "loss": 0.5092, + "step": 3083 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013986721169555194, + "loss": 0.3009, + "step": 3084 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013983257983628253, + "loss": 0.3831, + "step": 3085 + }, + { + "epoch": 3.66, + "learning_rate": 0.0001397979422977561, + "loss": 0.3718, + "step": 3086 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013976329908491118, + "loss": 0.3401, + "step": 3087 + }, + { + "epoch": 3.66, + "learning_rate": 0.00013972865020268722, + "loss": 0.5294, + "step": 3088 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013969399565602435, + "loss": 0.5054, + "step": 3089 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001396593354498635, + "loss": 0.4247, + "step": 3090 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013962466958914658, + "loss": 0.431, + "step": 3091 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013958999807881604, + "loss": 0.6341, + "step": 3092 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001395553209238154, + "loss": 0.5126, + "step": 3093 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013952063812908881, + "loss": 0.3775, + "step": 3094 + }, + { + "epoch": 3.67, + "learning_rate": 0.0001394859496995813, + "loss": 0.5149, + "step": 3095 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013945125564023868, + "loss": 0.2879, + "step": 3096 + }, + { + "epoch": 3.67, + "learning_rate": 0.00013941655595600756, + "loss": 0.5621, + "step": 3097 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013938185065183532, + "loss": 0.408, + "step": 3098 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013934713973267024, + "loss": 0.4247, + "step": 3099 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001393124232034613, + "loss": 0.4224, + "step": 3100 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001392777010691584, + "loss": 0.4142, + "step": 3101 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013924297333471204, + "loss": 0.6004, + "step": 3102 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013920824000507374, + "loss": 0.6016, + "step": 3103 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001391735010851956, + "loss": 0.4669, + "step": 3104 + }, + { + "epoch": 3.68, + "learning_rate": 0.00013913875658003074, + "loss": 0.3987, + "step": 3105 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001391040064945329, + "loss": 0.471, + "step": 3106 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001390692508336568, + "loss": 0.6135, + "step": 3107 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013903448960235766, + "loss": 0.5369, + "step": 3108 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013899972280559183, + "loss": 0.3295, + "step": 3109 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001389649504483162, + "loss": 0.309, + "step": 3110 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013893017253548858, + "loss": 0.4026, + "step": 3111 + }, + { + "epoch": 3.69, + "learning_rate": 0.00013889538907206755, + "loss": 0.4724, + "step": 3112 + }, + { + "epoch": 3.69, + "learning_rate": 0.0001388606000630125, + "loss": 0.3606, + "step": 3113 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001388258055132835, + "loss": 0.4894, + "step": 3114 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387910054278416, + "loss": 0.4832, + "step": 3115 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001387561998116484, + "loss": 0.4604, + "step": 3116 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013872138866966656, + "loss": 0.4377, + "step": 3117 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013868657200685934, + "loss": 0.3965, + "step": 3118 + }, + { + "epoch": 3.7, + "learning_rate": 0.0001386517498281908, + "loss": 0.7653, + "step": 3119 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013861692213862584, + "loss": 0.5213, + "step": 3120 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013858208894313017, + "loss": 0.9296, + "step": 3121 + }, + { + "epoch": 3.7, + "learning_rate": 0.00013854725024667016, + "loss": 0.7738, + "step": 3122 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013851240605421315, + "loss": 0.5826, + "step": 3123 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384775563707271, + "loss": 0.5502, + "step": 3124 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013844270120118085, + "loss": 0.3535, + "step": 3125 + }, + { + "epoch": 3.71, + "learning_rate": 0.0001384078405505439, + "loss": 0.4853, + "step": 3126 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013837297442378675, + "loss": 0.5819, + "step": 3127 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013833810282588044, + "loss": 0.3728, + "step": 3128 + }, + { + "epoch": 3.71, + "learning_rate": 0.00013830322576179697, + "loss": 0.3327, + "step": 3129 + }, + { + "epoch": 3.71, + "learning_rate": 0.000138268343236509, + "loss": 0.4618, + "step": 3130 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013823345525499004, + "loss": 0.3377, + "step": 3131 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013819856182221434, + "loss": 0.3154, + "step": 3132 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013816366294315695, + "loss": 0.5116, + "step": 3133 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001381287586227937, + "loss": 0.4987, + "step": 3134 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013809384886610118, + "loss": 0.5596, + "step": 3135 + }, + { + "epoch": 3.72, + "eval_loss": 2.939779281616211, + "eval_runtime": 283.9953, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 3135 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013805893367805678, + "loss": 0.5128, + "step": 3136 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001380240130636386, + "loss": 0.3149, + "step": 3137 + }, + { + "epoch": 3.72, + "learning_rate": 0.00013798908702782558, + "loss": 0.4984, + "step": 3138 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001379541555755974, + "loss": 0.626, + "step": 3139 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013791921871193457, + "loss": 0.4949, + "step": 3140 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013788427644181823, + "loss": 0.5654, + "step": 3141 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001378493287702305, + "loss": 0.4197, + "step": 3142 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013781437570215406, + "loss": 0.4341, + "step": 3143 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013777941724257253, + "loss": 0.3576, + "step": 3144 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013774445339647014, + "loss": 0.3098, + "step": 3145 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013770948416883205, + "loss": 0.6052, + "step": 3146 + }, + { + "epoch": 3.73, + "learning_rate": 0.00013767450956464407, + "loss": 0.4327, + "step": 3147 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013763952958889287, + "loss": 0.4717, + "step": 3148 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001376045442465657, + "loss": 0.5263, + "step": 3149 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013756955354265085, + "loss": 0.5021, + "step": 3150 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013753455748213714, + "loss": 0.4066, + "step": 3151 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013749955607001433, + "loss": 0.3461, + "step": 3152 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013746454931127278, + "loss": 0.4318, + "step": 3153 + }, + { + "epoch": 3.74, + "learning_rate": 0.00013742953721090372, + "loss": 0.4195, + "step": 3154 + }, + { + "epoch": 3.74, + "learning_rate": 0.0001373945197738991, + "loss": 0.3862, + "step": 3155 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013735949700525163, + "loss": 0.5916, + "step": 3156 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013732446890995484, + "loss": 0.5336, + "step": 3157 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013728943549300295, + "loss": 0.4104, + "step": 3158 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013725439675939095, + "loss": 0.541, + "step": 3159 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013721935271411464, + "loss": 0.5173, + "step": 3160 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013718430336217045, + "loss": 0.3866, + "step": 3161 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013714924870855571, + "loss": 0.6113, + "step": 3162 + }, + { + "epoch": 3.75, + "learning_rate": 0.00013711418875826846, + "loss": 0.5817, + "step": 3163 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370791235163075, + "loss": 0.5331, + "step": 3164 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013704405298767229, + "loss": 0.5744, + "step": 3165 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001370089771773632, + "loss": 0.494, + "step": 3166 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013697389609038124, + "loss": 0.4537, + "step": 3167 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013693880973172822, + "loss": 0.5494, + "step": 3168 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013690371810640665, + "loss": 0.537, + "step": 3169 + }, + { + "epoch": 3.76, + "learning_rate": 0.0001368686212194199, + "loss": 0.4698, + "step": 3170 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013683351907577194, + "loss": 0.5254, + "step": 3171 + }, + { + "epoch": 3.76, + "learning_rate": 0.00013679841168046767, + "loss": 0.3857, + "step": 3172 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013676329903851254, + "loss": 0.4464, + "step": 3173 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001367281811549129, + "loss": 0.5651, + "step": 3174 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001366930580346758, + "loss": 0.4192, + "step": 3175 + }, + { + "epoch": 3.77, + "learning_rate": 0.000136657929682809, + "loss": 0.3364, + "step": 3176 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013662279610432104, + "loss": 0.3539, + "step": 3177 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013658765730422125, + "loss": 0.6074, + "step": 3178 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013655251328751957, + "loss": 0.5322, + "step": 3179 + }, + { + "epoch": 3.77, + "learning_rate": 0.00013651736405922686, + "loss": 0.4176, + "step": 3180 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013648220962435458, + "loss": 0.4878, + "step": 3181 + }, + { + "epoch": 3.78, + "learning_rate": 0.000136447049987915, + "loss": 0.6351, + "step": 3182 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013641188515492109, + "loss": 0.4487, + "step": 3183 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363767151303866, + "loss": 0.4451, + "step": 3184 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013634153991932607, + "loss": 0.4944, + "step": 3185 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001363063595267547, + "loss": 0.5932, + "step": 3186 + }, + { + "epoch": 3.78, + "learning_rate": 0.00013627117395768833, + "loss": 0.4964, + "step": 3187 + }, + { + "epoch": 3.78, + "learning_rate": 0.0001362359832171438, + "loss": 0.6795, + "step": 3188 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013620078731013845, + "loss": 0.3862, + "step": 3189 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361655862416905, + "loss": 0.3425, + "step": 3190 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001361303800168188, + "loss": 0.4361, + "step": 3191 + }, + { + "epoch": 3.79, + "learning_rate": 0.0001360951686405431, + "loss": 0.5774, + "step": 3192 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013605995211788365, + "loss": 0.4044, + "step": 3193 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013602473045386165, + "loss": 0.3858, + "step": 3194 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013598950365349883, + "loss": 0.6136, + "step": 3195 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013595427172181785, + "loss": 0.329, + "step": 3196 + }, + { + "epoch": 3.79, + "learning_rate": 0.00013591903466384203, + "loss": 0.3898, + "step": 3197 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013588379248459536, + "loss": 0.4809, + "step": 3198 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013584854518910262, + "loss": 0.4108, + "step": 3199 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013581329278238927, + "loss": 0.4655, + "step": 3200 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013577803526948162, + "loss": 0.4657, + "step": 3201 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013574277265540654, + "loss": 0.4842, + "step": 3202 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013570750494519175, + "loss": 0.4593, + "step": 3203 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013567223214386564, + "loss": 0.435, + "step": 3204 + }, + { + "epoch": 3.8, + "learning_rate": 0.00013563695425645737, + "loss": 0.7146, + "step": 3205 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013560167128799674, + "loss": 0.5027, + "step": 3206 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013556638324351442, + "loss": 0.4844, + "step": 3207 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013553109012804163, + "loss": 0.7605, + "step": 3208 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013549579194661044, + "loss": 0.396, + "step": 3209 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013546048870425356, + "loss": 0.5178, + "step": 3210 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013542518040600453, + "loss": 0.6946, + "step": 3211 + }, + { + "epoch": 3.81, + "learning_rate": 0.0001353898670568975, + "loss": 0.5054, + "step": 3212 + }, + { + "epoch": 3.81, + "learning_rate": 0.00013535454866196739, + "loss": 0.4495, + "step": 3213 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013531922522624982, + "loss": 0.5138, + "step": 3214 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352838967547812, + "loss": 0.4706, + "step": 3215 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013524856325259848, + "loss": 0.5193, + "step": 3216 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001352132247247396, + "loss": 0.4436, + "step": 3217 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013517788117624292, + "loss": 0.4139, + "step": 3218 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001351425326121478, + "loss": 0.5937, + "step": 3219 + }, + { + "epoch": 3.82, + "learning_rate": 0.000135107179037494, + "loss": 0.3375, + "step": 3220 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013507182045732234, + "loss": 0.3712, + "step": 3221 + }, + { + "epoch": 3.82, + "learning_rate": 0.00013503645687667408, + "loss": 0.3424, + "step": 3222 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013500108830059133, + "loss": 0.3333, + "step": 3223 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013496571473411688, + "loss": 0.4042, + "step": 3224 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013493033618229417, + "loss": 0.4963, + "step": 3225 + }, + { + "epoch": 3.83, + "learning_rate": 0.0001348949526501675, + "loss": 0.3946, + "step": 3226 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013485956414278178, + "loss": 0.5807, + "step": 3227 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013482417066518256, + "loss": 0.4561, + "step": 3228 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013478877222241627, + "loss": 0.4964, + "step": 3229 + }, + { + "epoch": 3.83, + "learning_rate": 0.00013475336881952986, + "loss": 0.6429, + "step": 3230 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013471796046157116, + "loss": 0.5466, + "step": 3231 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013468254715358861, + "loss": 0.3882, + "step": 3232 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013464712890063138, + "loss": 0.5006, + "step": 3233 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001346117057077493, + "loss": 0.494, + "step": 3234 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013457627757999303, + "loss": 0.5444, + "step": 3235 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013454084452241372, + "loss": 0.3714, + "step": 3236 + }, + { + "epoch": 3.84, + "learning_rate": 0.00013450540654006348, + "loss": 0.3335, + "step": 3237 + }, + { + "epoch": 3.84, + "learning_rate": 0.0001344699636379949, + "loss": 0.4771, + "step": 3238 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013443451582126144, + "loss": 0.466, + "step": 3239 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013439906309491712, + "loss": 0.5537, + "step": 3240 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013436360546401676, + "loss": 0.5899, + "step": 3241 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013432814293361584, + "loss": 0.443, + "step": 3242 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013429267550877055, + "loss": 0.4238, + "step": 3243 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013425720319453773, + "loss": 0.6529, + "step": 3244 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013422172599597505, + "loss": 0.6163, + "step": 3245 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013418624391814068, + "loss": 0.5183, + "step": 3246 + }, + { + "epoch": 3.85, + "learning_rate": 0.00013415075696609363, + "loss": 0.7659, + "step": 3247 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001341152651448936, + "loss": 0.3717, + "step": 3248 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340797684596009, + "loss": 0.6885, + "step": 3249 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340442669152766, + "loss": 0.4483, + "step": 3250 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001340087605169825, + "loss": 0.3417, + "step": 3251 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013397324926978094, + "loss": 0.4751, + "step": 3252 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013393773317873508, + "loss": 0.4448, + "step": 3253 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013390221224890878, + "loss": 0.6278, + "step": 3254 + }, + { + "epoch": 3.86, + "learning_rate": 0.00013386668648536655, + "loss": 0.2995, + "step": 3255 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013383115589317353, + "loss": 0.535, + "step": 3256 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013379562047739568, + "loss": 0.4972, + "step": 3257 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013376008024309948, + "loss": 0.4821, + "step": 3258 + }, + { + "epoch": 3.87, + "learning_rate": 0.0001337245351953523, + "loss": 0.392, + "step": 3259 + }, + { + "epoch": 3.87, + "learning_rate": 0.000133688985339222, + "loss": 0.413, + "step": 3260 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013365343067977726, + "loss": 0.4689, + "step": 3261 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013361787122208744, + "loss": 0.4737, + "step": 3262 + }, + { + "epoch": 3.87, + "learning_rate": 0.00013358230697122246, + "loss": 0.5033, + "step": 3263 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013354673793225302, + "loss": 0.4901, + "step": 3264 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013351116411025054, + "loss": 0.5776, + "step": 3265 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013347558551028702, + "loss": 0.5005, + "step": 3266 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013344000213743522, + "loss": 0.6475, + "step": 3267 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013340441399676856, + "loss": 0.4394, + "step": 3268 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333688210933611, + "loss": 0.4351, + "step": 3269 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001333332234322876, + "loss": 0.4526, + "step": 3270 + }, + { + "epoch": 3.88, + "learning_rate": 0.0001332976210186236, + "loss": 0.3006, + "step": 3271 + }, + { + "epoch": 3.88, + "learning_rate": 0.00013326201385744518, + "loss": 0.382, + "step": 3272 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013322640195382907, + "loss": 0.3488, + "step": 3273 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013319078531285285, + "loss": 0.5538, + "step": 3274 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013315516393959463, + "loss": 0.5328, + "step": 3275 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013311953783913324, + "loss": 0.5216, + "step": 3276 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330839070165482, + "loss": 0.3845, + "step": 3277 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001330482714769197, + "loss": 0.5293, + "step": 3278 + }, + { + "epoch": 3.89, + "learning_rate": 0.00013301263122532855, + "loss": 0.5415, + "step": 3279 + }, + { + "epoch": 3.89, + "learning_rate": 0.0001329769862668563, + "loss": 0.5309, + "step": 3280 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013294133660658516, + "loss": 0.4629, + "step": 3281 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013290568224959794, + "loss": 0.4329, + "step": 3282 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013287002320097821, + "loss": 0.3973, + "step": 3283 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001328343594658102, + "loss": 0.3417, + "step": 3284 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013279869104917873, + "loss": 0.4784, + "step": 3285 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013276301795616936, + "loss": 0.3668, + "step": 3286 + }, + { + "epoch": 3.9, + "learning_rate": 0.0001327273401918683, + "loss": 0.3726, + "step": 3287 + }, + { + "epoch": 3.9, + "learning_rate": 0.00013269165776136238, + "loss": 0.518, + "step": 3288 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013265597066973922, + "loss": 0.3864, + "step": 3289 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013262027892208694, + "loss": 0.4249, + "step": 3290 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013258458252349444, + "loss": 0.395, + "step": 3291 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013254888147905126, + "loss": 0.8359, + "step": 3292 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013251317579384756, + "loss": 0.5028, + "step": 3293 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324774654729742, + "loss": 0.4216, + "step": 3294 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001324417505215227, + "loss": 0.6145, + "step": 3295 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013240603094458522, + "loss": 0.6158, + "step": 3296 + }, + { + "epoch": 3.91, + "learning_rate": 0.00013237030674725464, + "loss": 0.5101, + "step": 3297 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001323345779346244, + "loss": 0.6933, + "step": 3298 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322988445117886, + "loss": 0.4192, + "step": 3299 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001322631064838422, + "loss": 0.4549, + "step": 3300 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013222736385588054, + "loss": 0.4947, + "step": 3301 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013219161663299982, + "loss": 0.5383, + "step": 3302 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013215586482029669, + "loss": 0.4919, + "step": 3303 + }, + { + "epoch": 3.92, + "learning_rate": 0.0001321201084228687, + "loss": 0.4603, + "step": 3304 + }, + { + "epoch": 3.92, + "learning_rate": 0.00013208434744581385, + "loss": 0.3127, + "step": 3305 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013204858189423097, + "loss": 0.754, + "step": 3306 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013201281177321935, + "loss": 0.3746, + "step": 3307 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013197703708787913, + "loss": 0.5576, + "step": 3308 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001319412578433109, + "loss": 0.4992, + "step": 3309 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013190547404461598, + "loss": 0.4533, + "step": 3310 + }, + { + "epoch": 3.93, + "learning_rate": 0.0001318696856968965, + "loss": 0.4155, + "step": 3311 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013183389280525497, + "loss": 0.3661, + "step": 3312 + }, + { + "epoch": 3.93, + "learning_rate": 0.00013179809537479476, + "loss": 0.4512, + "step": 3313 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013176229341061975, + "loss": 0.5895, + "step": 3314 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013172648691783454, + "loss": 0.3308, + "step": 3315 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013169067590154432, + "loss": 0.4128, + "step": 3316 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013165486036685503, + "loss": 0.5432, + "step": 3317 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001316190403188731, + "loss": 0.4297, + "step": 3318 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013158321576270575, + "loss": 0.4259, + "step": 3319 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315473867034608, + "loss": 0.4428, + "step": 3320 + }, + { + "epoch": 3.94, + "learning_rate": 0.0001315115531462466, + "loss": 0.6495, + "step": 3321 + }, + { + "epoch": 3.94, + "learning_rate": 0.00013147571509617228, + "loss": 0.5706, + "step": 3322 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314398725583476, + "loss": 0.3647, + "step": 3323 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001314040255378829, + "loss": 0.4864, + "step": 3324 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013136817403988917, + "loss": 0.4197, + "step": 3325 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013133231806947805, + "loss": 0.4818, + "step": 3326 + }, + { + "epoch": 3.95, + "learning_rate": 0.00013129645763176184, + "loss": 0.4201, + "step": 3327 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312605927318534, + "loss": 0.4352, + "step": 3328 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001312247233748664, + "loss": 0.2785, + "step": 3329 + }, + { + "epoch": 3.95, + "learning_rate": 0.0001311888495659149, + "loss": 0.4424, + "step": 3330 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013115297131011382, + "loss": 0.4258, + "step": 3331 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013111708861257855, + "loss": 0.4332, + "step": 3332 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013108120147842519, + "loss": 0.3578, + "step": 3333 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001310453099127705, + "loss": 0.4219, + "step": 3334 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001310094139207318, + "loss": 0.5837, + "step": 3335 + }, + { + "epoch": 3.96, + "learning_rate": 0.0001309735135074271, + "loss": 0.3965, + "step": 3336 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013093760867797502, + "loss": 0.4764, + "step": 3337 + }, + { + "epoch": 3.96, + "learning_rate": 0.00013090169943749476, + "loss": 0.4933, + "step": 3338 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013086578579110623, + "loss": 0.3434, + "step": 3339 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001308298677439299, + "loss": 0.5931, + "step": 3340 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013079394530108695, + "loss": 0.442, + "step": 3341 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001307580184676991, + "loss": 0.3229, + "step": 3342 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001307220872488888, + "loss": 0.4567, + "step": 3343 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013068615164977895, + "loss": 0.6224, + "step": 3344 + }, + { + "epoch": 3.97, + "eval_loss": 2.954587936401367, + "eval_runtime": 283.9817, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 3344 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013065021167549322, + "loss": 0.6767, + "step": 3345 + }, + { + "epoch": 3.97, + "learning_rate": 0.0001306142673311559, + "loss": 0.4809, + "step": 3346 + }, + { + "epoch": 3.97, + "learning_rate": 0.00013057831862189187, + "loss": 0.4563, + "step": 3347 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013054236555282657, + "loss": 0.4674, + "step": 3348 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013050640812908623, + "loss": 0.6636, + "step": 3349 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013047044635579747, + "loss": 0.4652, + "step": 3350 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013043448023808773, + "loss": 0.3912, + "step": 3351 + }, + { + "epoch": 3.98, + "learning_rate": 0.000130398509781085, + "loss": 0.6064, + "step": 3352 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013036253498991787, + "loss": 0.5975, + "step": 3353 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013032655586971552, + "loss": 0.7249, + "step": 3354 + }, + { + "epoch": 3.98, + "learning_rate": 0.00013029057242560784, + "loss": 0.4604, + "step": 3355 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013025458466272525, + "loss": 0.4895, + "step": 3356 + }, + { + "epoch": 3.99, + "learning_rate": 0.0001302185925861988, + "loss": 0.3628, + "step": 3357 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013018259620116025, + "loss": 0.4798, + "step": 3358 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013014659551274189, + "loss": 0.663, + "step": 3359 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013011059052607656, + "loss": 0.3923, + "step": 3360 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013007458124629785, + "loss": 0.5601, + "step": 3361 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013003856767853983, + "loss": 0.67, + "step": 3362 + }, + { + "epoch": 3.99, + "learning_rate": 0.00013000254982793735, + "loss": 0.5059, + "step": 3363 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012996652769962566, + "loss": 0.4992, + "step": 3364 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012993050129874082, + "loss": 0.6196, + "step": 3365 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012989447063041935, + "loss": 0.4157, + "step": 3366 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012985843569979848, + "loss": 0.5714, + "step": 3367 + }, + { + "epoch": 4.0, + "learning_rate": 0.000129822396512016, + "loss": 0.7484, + "step": 3368 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012978635307221026, + "loss": 0.3928, + "step": 3369 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012975030538552032, + "loss": 0.4129, + "step": 3370 + }, + { + "epoch": 4.0, + "learning_rate": 0.0001297142534570858, + "loss": 0.5407, + "step": 3371 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012967819729204684, + "loss": 0.479, + "step": 3372 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012964213689554437, + "loss": 0.4492, + "step": 3373 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012960607227271973, + "loss": 0.4574, + "step": 3374 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012957000342871502, + "loss": 0.7554, + "step": 3375 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012953393036867282, + "loss": 0.3038, + "step": 3376 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001294978530977364, + "loss": 0.5125, + "step": 3377 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001294617716210495, + "loss": 0.7192, + "step": 3378 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012942568594375667, + "loss": 0.4371, + "step": 3379 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012938959607100288, + "loss": 0.3672, + "step": 3380 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012935350200793378, + "loss": 0.4752, + "step": 3381 + }, + { + "epoch": 4.0, + "learning_rate": 0.0001293174037596956, + "loss": 0.225, + "step": 3382 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012928130133143512, + "loss": 0.2106, + "step": 3383 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012924519472829978, + "loss": 0.213, + "step": 3384 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001292090839554376, + "loss": 0.2775, + "step": 3385 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001291729690179972, + "loss": 0.2417, + "step": 3386 + }, + { + "epoch": 4.01, + "learning_rate": 0.0001291368499211278, + "loss": 0.2212, + "step": 3387 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012910072666997912, + "loss": 0.2644, + "step": 3388 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012906459926970162, + "loss": 0.2206, + "step": 3389 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012902846772544624, + "loss": 0.2238, + "step": 3390 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012899233204236455, + "loss": 0.2212, + "step": 3391 + }, + { + "epoch": 4.01, + "learning_rate": 0.00012895619222560878, + "loss": 0.2082, + "step": 3392 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012892004828033156, + "loss": 0.2896, + "step": 3393 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012888390021168636, + "loss": 0.2351, + "step": 3394 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012884774802482697, + "loss": 0.2263, + "step": 3395 + }, + { + "epoch": 4.02, + "learning_rate": 0.000128811591724908, + "loss": 0.2243, + "step": 3396 + }, + { + "epoch": 4.02, + "learning_rate": 0.0001287754313170845, + "loss": 0.2433, + "step": 3397 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012873926680651222, + "loss": 0.2566, + "step": 3398 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012870309819834735, + "loss": 0.2537, + "step": 3399 + }, + { + "epoch": 4.02, + "learning_rate": 0.00012866692549774682, + "loss": 0.298, + "step": 3400 + }, + { + "epoch": 4.03, + "learning_rate": 0.000128630748709868, + "loss": 0.2246, + "step": 3401 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012859456783986893, + "loss": 0.2179, + "step": 3402 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012855838289290821, + "loss": 0.2394, + "step": 3403 + }, + { + "epoch": 4.03, + "learning_rate": 0.0001285221938741451, + "loss": 0.2068, + "step": 3404 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012848600078873925, + "loss": 0.1961, + "step": 3405 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012844980364185108, + "loss": 0.2719, + "step": 3406 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012841360243864147, + "loss": 0.2009, + "step": 3407 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012837739718427196, + "loss": 0.2343, + "step": 3408 + }, + { + "epoch": 4.03, + "learning_rate": 0.00012834118788390456, + "loss": 0.3161, + "step": 3409 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012830497454270205, + "loss": 0.1992, + "step": 3410 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012826875716582755, + "loss": 0.261, + "step": 3411 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012823253575844495, + "loss": 0.2403, + "step": 3412 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012819631032571854, + "loss": 0.2271, + "step": 3413 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012816008087281332, + "loss": 0.2062, + "step": 3414 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012812384740489485, + "loss": 0.2133, + "step": 3415 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012808760992712924, + "loss": 0.2372, + "step": 3416 + }, + { + "epoch": 4.04, + "learning_rate": 0.00012805136844468309, + "loss": 0.2466, + "step": 3417 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012801512296272368, + "loss": 0.2456, + "step": 3418 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012797887348641883, + "loss": 0.2171, + "step": 3419 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012794262002093697, + "loss": 0.3038, + "step": 3420 + }, + { + "epoch": 4.05, + "learning_rate": 0.000127906362571447, + "loss": 0.1868, + "step": 3421 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012787010114311844, + "loss": 0.2611, + "step": 3422 + }, + { + "epoch": 4.05, + "learning_rate": 0.00012783383574112138, + "loss": 0.2131, + "step": 3423 + }, + { + "epoch": 4.05, + "learning_rate": 0.0001277975663706265, + "loss": 0.2005, + "step": 3424 + }, + { + "epoch": 4.05, + "learning_rate": 0.000127761293036805, + "loss": 0.2455, + "step": 3425 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001277250157448287, + "loss": 0.2837, + "step": 3426 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012768873449986988, + "loss": 0.2252, + "step": 3427 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012765244930710155, + "loss": 0.211, + "step": 3428 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012761616017169708, + "loss": 0.1831, + "step": 3429 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001275798670988306, + "loss": 0.1985, + "step": 3430 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012754357009367665, + "loss": 0.2341, + "step": 3431 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012750726916141046, + "loss": 0.2395, + "step": 3432 + }, + { + "epoch": 4.06, + "learning_rate": 0.00012747096430720765, + "loss": 0.2183, + "step": 3433 + }, + { + "epoch": 4.06, + "learning_rate": 0.0001274346555362446, + "loss": 0.2698, + "step": 3434 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012739834285369807, + "loss": 0.2104, + "step": 3435 + }, + { + "epoch": 4.07, + "learning_rate": 0.0001273620262647455, + "loss": 0.2395, + "step": 3436 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012732570577456484, + "loss": 0.2218, + "step": 3437 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012728938138833462, + "loss": 0.2337, + "step": 3438 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012725305311123386, + "loss": 0.1958, + "step": 3439 + }, + { + "epoch": 4.07, + "learning_rate": 0.0001272167209484422, + "loss": 0.2767, + "step": 3440 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012718038490513984, + "loss": 0.2238, + "step": 3441 + }, + { + "epoch": 4.07, + "learning_rate": 0.00012714404498650743, + "loss": 0.2931, + "step": 3442 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012710770119772632, + "loss": 0.3166, + "step": 3443 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012707135354397836, + "loss": 0.1985, + "step": 3444 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012703500203044586, + "loss": 0.2208, + "step": 3445 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001269986466623118, + "loss": 0.2279, + "step": 3446 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012696228744475964, + "loss": 0.2656, + "step": 3447 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012692592438297341, + "loss": 0.2181, + "step": 3448 + }, + { + "epoch": 4.08, + "learning_rate": 0.00012688955748213772, + "loss": 0.1994, + "step": 3449 + }, + { + "epoch": 4.08, + "learning_rate": 0.0001268531867474377, + "loss": 0.2818, + "step": 3450 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012681681218405897, + "loss": 0.2277, + "step": 3451 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012678043379718782, + "loss": 0.2692, + "step": 3452 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012674405159201091, + "loss": 0.2664, + "step": 3453 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012670766557371565, + "loss": 0.2008, + "step": 3454 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012667127574748986, + "loss": 0.2382, + "step": 3455 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001266348821185219, + "loss": 0.2454, + "step": 3456 + }, + { + "epoch": 4.09, + "learning_rate": 0.0001265984846920008, + "loss": 0.3547, + "step": 3457 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012656208347311594, + "loss": 0.2115, + "step": 3458 + }, + { + "epoch": 4.09, + "learning_rate": 0.00012652567846705736, + "loss": 0.1929, + "step": 3459 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012648926967901567, + "loss": 0.2076, + "step": 3460 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012645285711418194, + "loss": 0.2045, + "step": 3461 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012641644077774776, + "loss": 0.2378, + "step": 3462 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001263800206749054, + "loss": 0.2674, + "step": 3463 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012634359681084752, + "loss": 0.2125, + "step": 3464 + }, + { + "epoch": 4.1, + "learning_rate": 0.00012630716919076736, + "loss": 0.2097, + "step": 3465 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001262707378198587, + "loss": 0.2352, + "step": 3466 + }, + { + "epoch": 4.1, + "learning_rate": 0.0001262343027033159, + "loss": 0.2105, + "step": 3467 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012619786384633375, + "loss": 0.2207, + "step": 3468 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001261614212541077, + "loss": 0.304, + "step": 3469 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012612497493183364, + "loss": 0.2239, + "step": 3470 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012608852488470802, + "loss": 0.2875, + "step": 3471 + }, + { + "epoch": 4.11, + "learning_rate": 0.0001260520711179278, + "loss": 0.3197, + "step": 3472 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012601561363669058, + "loss": 0.1942, + "step": 3473 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012597915244619428, + "loss": 0.2117, + "step": 3474 + }, + { + "epoch": 4.11, + "learning_rate": 0.00012594268755163754, + "loss": 0.2222, + "step": 3475 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012590621895821943, + "loss": 0.1871, + "step": 3476 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001258697466711396, + "loss": 0.2146, + "step": 3477 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001258332706955982, + "loss": 0.3307, + "step": 3478 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012579679103679592, + "loss": 0.2175, + "step": 3479 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012576030769993393, + "loss": 0.2976, + "step": 3480 + }, + { + "epoch": 4.12, + "learning_rate": 0.000125723820690214, + "loss": 0.2031, + "step": 3481 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012568733001283827, + "loss": 0.2046, + "step": 3482 + }, + { + "epoch": 4.12, + "learning_rate": 0.0001256508356730097, + "loss": 0.2642, + "step": 3483 + }, + { + "epoch": 4.12, + "learning_rate": 0.00012561433767593145, + "loss": 0.2088, + "step": 3484 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001255778360268074, + "loss": 0.2458, + "step": 3485 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001255413307308418, + "loss": 0.2237, + "step": 3486 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012550482179323963, + "loss": 0.2696, + "step": 3487 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012546830921920617, + "loss": 0.2078, + "step": 3488 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012543179301394744, + "loss": 0.2199, + "step": 3489 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001253952731826697, + "loss": 0.2258, + "step": 3490 + }, + { + "epoch": 4.13, + "learning_rate": 0.00012535874973057997, + "loss": 0.1981, + "step": 3491 + }, + { + "epoch": 4.13, + "learning_rate": 0.0001253222226628857, + "loss": 0.3252, + "step": 3492 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012528569198479481, + "loss": 0.2717, + "step": 3493 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001252491577015158, + "loss": 0.248, + "step": 3494 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012521261981825768, + "loss": 0.2725, + "step": 3495 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012517607834022993, + "loss": 0.2203, + "step": 3496 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001251395332726426, + "loss": 0.2461, + "step": 3497 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012510298462070619, + "loss": 0.3018, + "step": 3498 + }, + { + "epoch": 4.14, + "learning_rate": 0.0001250664323896317, + "loss": 0.2329, + "step": 3499 + }, + { + "epoch": 4.14, + "learning_rate": 0.00012502987658463075, + "loss": 0.221, + "step": 3500 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012499331721091544, + "loss": 0.2812, + "step": 3501 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012495675427369823, + "loss": 0.2846, + "step": 3502 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012492018777819226, + "loss": 0.2447, + "step": 3503 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001248836177296111, + "loss": 0.1969, + "step": 3504 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012484704413316878, + "loss": 0.2045, + "step": 3505 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012481046699408004, + "loss": 0.1862, + "step": 3506 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012477388631755985, + "loss": 0.23, + "step": 3507 + }, + { + "epoch": 4.15, + "learning_rate": 0.0001247373021088239, + "loss": 0.2972, + "step": 3508 + }, + { + "epoch": 4.15, + "learning_rate": 0.00012470071437308827, + "loss": 0.2222, + "step": 3509 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012466412311556952, + "loss": 0.2262, + "step": 3510 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012462752834148486, + "loss": 0.3642, + "step": 3511 + }, + { + "epoch": 4.16, + "learning_rate": 0.0001245909300560518, + "loss": 0.2221, + "step": 3512 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012455432826448862, + "loss": 0.2607, + "step": 3513 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012451772297201376, + "loss": 0.2396, + "step": 3514 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012448111418384645, + "loss": 0.2034, + "step": 3515 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012444450190520623, + "loss": 0.2404, + "step": 3516 + }, + { + "epoch": 4.16, + "learning_rate": 0.00012440788614131329, + "loss": 0.2029, + "step": 3517 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012437126689738816, + "loss": 0.2128, + "step": 3518 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012433464417865202, + "loss": 0.2857, + "step": 3519 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001242980179903264, + "loss": 0.2931, + "step": 3520 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012426138833763342, + "loss": 0.2319, + "step": 3521 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012422475522579573, + "loss": 0.2272, + "step": 3522 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012418811866003632, + "loss": 0.2498, + "step": 3523 + }, + { + "epoch": 4.17, + "learning_rate": 0.00012415147864557884, + "loss": 0.1993, + "step": 3524 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001241148351876473, + "loss": 0.2329, + "step": 3525 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001240781882914663, + "loss": 0.2228, + "step": 3526 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012404153796226087, + "loss": 0.2228, + "step": 3527 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012400488420525653, + "loss": 0.2277, + "step": 3528 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001239682270256793, + "loss": 0.2344, + "step": 3529 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001239315664287558, + "loss": 0.2043, + "step": 3530 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001238949024197129, + "loss": 0.2143, + "step": 3531 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012385823500377812, + "loss": 0.2054, + "step": 3532 + }, + { + "epoch": 4.18, + "learning_rate": 0.00012382156418617947, + "loss": 0.2191, + "step": 3533 + }, + { + "epoch": 4.18, + "learning_rate": 0.0001237848899721454, + "loss": 0.2199, + "step": 3534 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012374821236690482, + "loss": 0.1899, + "step": 3535 + }, + { + "epoch": 4.19, + "learning_rate": 0.0001237115313756872, + "loss": 0.2206, + "step": 3536 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012367484700372242, + "loss": 0.2107, + "step": 3537 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012363815925624087, + "loss": 0.1904, + "step": 3538 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012360146813847345, + "loss": 0.2259, + "step": 3539 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012356477365565148, + "loss": 0.2488, + "step": 3540 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012352807581300678, + "loss": 0.3026, + "step": 3541 + }, + { + "epoch": 4.19, + "learning_rate": 0.00012349137461577174, + "loss": 0.2141, + "step": 3542 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012345467006917907, + "loss": 0.2183, + "step": 3543 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012341796217846208, + "loss": 0.2978, + "step": 3544 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001233812509488545, + "loss": 0.2255, + "step": 3545 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012334453638559057, + "loss": 0.2209, + "step": 3546 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012330781849390494, + "loss": 0.2464, + "step": 3547 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012327109727903283, + "loss": 0.2259, + "step": 3548 + }, + { + "epoch": 4.2, + "learning_rate": 0.00012323437274620983, + "loss": 0.209, + "step": 3549 + }, + { + "epoch": 4.2, + "learning_rate": 0.0001231976449006721, + "loss": 0.2424, + "step": 3550 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012316091374765624, + "loss": 0.2162, + "step": 3551 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001231241792923993, + "loss": 0.2442, + "step": 3552 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012308744154013878, + "loss": 0.2061, + "step": 3553 + }, + { + "epoch": 4.21, + "eval_loss": 3.390720844268799, + "eval_runtime": 283.8935, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3553 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012305070049611273, + "loss": 0.1838, + "step": 3554 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012301395616555957, + "loss": 0.197, + "step": 3555 + }, + { + "epoch": 4.21, + "learning_rate": 0.0001229772085537183, + "loss": 0.2479, + "step": 3556 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012294045766582823, + "loss": 0.3272, + "step": 3557 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012290370350712937, + "loss": 0.2301, + "step": 3558 + }, + { + "epoch": 4.21, + "learning_rate": 0.00012286694608286197, + "loss": 0.2367, + "step": 3559 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012283018539826685, + "loss": 0.2419, + "step": 3560 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001227934214585853, + "loss": 0.2605, + "step": 3561 + }, + { + "epoch": 4.22, + "learning_rate": 0.000122756654269059, + "loss": 0.2084, + "step": 3562 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012271988383493024, + "loss": 0.2414, + "step": 3563 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012268311016144163, + "loss": 0.2206, + "step": 3564 + }, + { + "epoch": 4.22, + "learning_rate": 0.0001226463332538363, + "loss": 0.2012, + "step": 3565 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012260955311735786, + "loss": 0.1884, + "step": 3566 + }, + { + "epoch": 4.22, + "learning_rate": 0.00012257276975725028, + "loss": 0.2155, + "step": 3567 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001225359831787581, + "loss": 0.2375, + "step": 3568 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012249919338712636, + "loss": 0.2713, + "step": 3569 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012246240038760043, + "loss": 0.2414, + "step": 3570 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012242560418542612, + "loss": 0.2209, + "step": 3571 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012238880478584985, + "loss": 0.2318, + "step": 3572 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012235200219411836, + "loss": 0.2858, + "step": 3573 + }, + { + "epoch": 4.23, + "learning_rate": 0.000122315196415479, + "loss": 0.2468, + "step": 3574 + }, + { + "epoch": 4.23, + "learning_rate": 0.00012227838745517932, + "loss": 0.2166, + "step": 3575 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001222415753184676, + "loss": 0.2349, + "step": 3576 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012220476001059238, + "loss": 0.2486, + "step": 3577 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012216794153680274, + "loss": 0.234, + "step": 3578 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012213111990234815, + "loss": 0.2008, + "step": 3579 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012209429511247864, + "loss": 0.2548, + "step": 3580 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001220574671724446, + "loss": 0.2562, + "step": 3581 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001220206360874969, + "loss": 0.2586, + "step": 3582 + }, + { + "epoch": 4.24, + "learning_rate": 0.0001219838018628868, + "loss": 0.2428, + "step": 3583 + }, + { + "epoch": 4.24, + "learning_rate": 0.00012194696450386608, + "loss": 0.2159, + "step": 3584 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012191012401568698, + "loss": 0.2544, + "step": 3585 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001218732804036021, + "loss": 0.2396, + "step": 3586 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012183643367286462, + "loss": 0.2335, + "step": 3587 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012179958382872796, + "loss": 0.2275, + "step": 3588 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012176273087644619, + "loss": 0.2291, + "step": 3589 + }, + { + "epoch": 4.25, + "learning_rate": 0.0001217258748212737, + "loss": 0.2272, + "step": 3590 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012168901566846535, + "loss": 0.2135, + "step": 3591 + }, + { + "epoch": 4.25, + "learning_rate": 0.00012165215342327648, + "loss": 0.22, + "step": 3592 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012161528809096285, + "loss": 0.2577, + "step": 3593 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012157841967678063, + "loss": 0.2006, + "step": 3594 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012154154818598647, + "loss": 0.2322, + "step": 3595 + }, + { + "epoch": 4.26, + "learning_rate": 0.0001215046736238374, + "loss": 0.196, + "step": 3596 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012146779599559095, + "loss": 0.2267, + "step": 3597 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012143091530650508, + "loss": 0.2416, + "step": 3598 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012139403156183817, + "loss": 0.2585, + "step": 3599 + }, + { + "epoch": 4.26, + "learning_rate": 0.00012135714476684903, + "loss": 0.2644, + "step": 3600 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012132025492679693, + "loss": 0.2355, + "step": 3601 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012128336204694148, + "loss": 0.2363, + "step": 3602 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012124646613254291, + "loss": 0.2476, + "step": 3603 + }, + { + "epoch": 4.27, + "learning_rate": 0.0001212095671888617, + "loss": 0.2185, + "step": 3604 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012117266522115889, + "loss": 0.2233, + "step": 3605 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012113576023469582, + "loss": 0.2084, + "step": 3606 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012109885223473439, + "loss": 0.2439, + "step": 3607 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012106194122653684, + "loss": 0.2409, + "step": 3608 + }, + { + "epoch": 4.27, + "learning_rate": 0.00012102502721536595, + "loss": 0.2183, + "step": 3609 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012098811020648475, + "loss": 0.2595, + "step": 3610 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012095119020515691, + "loss": 0.2135, + "step": 3611 + }, + { + "epoch": 4.28, + "learning_rate": 0.0001209142672166463, + "loss": 0.2125, + "step": 3612 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012087734124621742, + "loss": 0.2017, + "step": 3613 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012084041229913505, + "loss": 0.2163, + "step": 3614 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012080348038066452, + "loss": 0.2198, + "step": 3615 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012076654549607145, + "loss": 0.2234, + "step": 3616 + }, + { + "epoch": 4.28, + "learning_rate": 0.00012072960765062197, + "loss": 0.2201, + "step": 3617 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012069266684958265, + "loss": 0.245, + "step": 3618 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012065572309822037, + "loss": 0.2067, + "step": 3619 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012061877640180255, + "loss": 0.2284, + "step": 3620 + }, + { + "epoch": 4.29, + "learning_rate": 0.000120581826765597, + "loss": 0.2323, + "step": 3621 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012054487419487188, + "loss": 0.2162, + "step": 3622 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012050791869489586, + "loss": 0.2131, + "step": 3623 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012047096027093798, + "loss": 0.2168, + "step": 3624 + }, + { + "epoch": 4.29, + "learning_rate": 0.00012043399892826768, + "loss": 0.2293, + "step": 3625 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012039703467215488, + "loss": 0.2202, + "step": 3626 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012036006750786985, + "loss": 0.2288, + "step": 3627 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012032309744068334, + "loss": 0.2606, + "step": 3628 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012028612447586643, + "loss": 0.2754, + "step": 3629 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012024914861869063, + "loss": 0.239, + "step": 3630 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012021216987442798, + "loss": 0.2312, + "step": 3631 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012017518824835077, + "loss": 0.2299, + "step": 3632 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012013820374573184, + "loss": 0.2214, + "step": 3633 + }, + { + "epoch": 4.3, + "learning_rate": 0.00012010121637184428, + "loss": 0.2492, + "step": 3634 + }, + { + "epoch": 4.31, + "learning_rate": 0.00012006422613196178, + "loss": 0.2659, + "step": 3635 + }, + { + "epoch": 4.31, + "learning_rate": 0.00012002723303135826, + "loss": 0.23, + "step": 3636 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011999023707530819, + "loss": 0.287, + "step": 3637 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011995323826908635, + "loss": 0.2204, + "step": 3638 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011991623661796798, + "loss": 0.2277, + "step": 3639 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011987923212722872, + "loss": 0.2436, + "step": 3640 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011984222480214456, + "loss": 0.2074, + "step": 3641 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011980521464799198, + "loss": 0.2212, + "step": 3642 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011976820167004779, + "loss": 0.2147, + "step": 3643 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011973118587358928, + "loss": 0.2271, + "step": 3644 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011969416726389404, + "loss": 0.2498, + "step": 3645 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011965714584624012, + "loss": 0.2171, + "step": 3646 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011962012162590601, + "loss": 0.2276, + "step": 3647 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011958309460817052, + "loss": 0.2089, + "step": 3648 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011954606479831291, + "loss": 0.2691, + "step": 3649 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011950903220161285, + "loss": 0.2229, + "step": 3650 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011947199682335031, + "loss": 0.2315, + "step": 3651 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001194349586688058, + "loss": 0.2208, + "step": 3652 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001193979177432601, + "loss": 0.2159, + "step": 3653 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011936087405199446, + "loss": 0.2781, + "step": 3654 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011932382760029049, + "loss": 0.2142, + "step": 3655 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011928677839343026, + "loss": 0.2275, + "step": 3656 + }, + { + "epoch": 4.33, + "learning_rate": 0.0001192497264366961, + "loss": 0.2718, + "step": 3657 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011921267173537086, + "loss": 0.1947, + "step": 3658 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011917561429473771, + "loss": 0.2361, + "step": 3659 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011913855412008023, + "loss": 0.1999, + "step": 3660 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011910149121668241, + "loss": 0.2199, + "step": 3661 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011906442558982865, + "loss": 0.2217, + "step": 3662 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001190273572448036, + "loss": 0.2263, + "step": 3663 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011899028618689247, + "loss": 0.2216, + "step": 3664 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011895321242138075, + "loss": 0.2298, + "step": 3665 + }, + { + "epoch": 4.34, + "learning_rate": 0.0001189161359535544, + "loss": 0.2332, + "step": 3666 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011887905678869966, + "loss": 0.2955, + "step": 3667 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011884197493210328, + "loss": 0.2352, + "step": 3668 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011880489038905223, + "loss": 0.2104, + "step": 3669 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011876780316483401, + "loss": 0.2897, + "step": 3670 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011873071326473644, + "loss": 0.2041, + "step": 3671 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011869362069404775, + "loss": 0.2242, + "step": 3672 + }, + { + "epoch": 4.35, + "learning_rate": 0.0001186565254580565, + "loss": 0.2015, + "step": 3673 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011861942756205169, + "loss": 0.2716, + "step": 3674 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011858232701132264, + "loss": 0.2504, + "step": 3675 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011854522381115908, + "loss": 0.1846, + "step": 3676 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011850811796685117, + "loss": 0.207, + "step": 3677 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011847100948368937, + "loss": 0.2228, + "step": 3678 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011843389836696447, + "loss": 0.2365, + "step": 3679 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011839678462196784, + "loss": 0.2159, + "step": 3680 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011835966825399096, + "loss": 0.2413, + "step": 3681 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011832254926832586, + "loss": 0.2596, + "step": 3682 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011828542767026493, + "loss": 0.2041, + "step": 3683 + }, + { + "epoch": 4.36, + "learning_rate": 0.00011824830346510089, + "loss": 0.2512, + "step": 3684 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011821117665812682, + "loss": 0.2165, + "step": 3685 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011817404725463618, + "loss": 0.2125, + "step": 3686 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011813691525992286, + "loss": 0.2557, + "step": 3687 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011809978067928102, + "loss": 0.2088, + "step": 3688 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011806264351800526, + "loss": 0.2093, + "step": 3689 + }, + { + "epoch": 4.37, + "learning_rate": 0.0001180255037813906, + "loss": 0.2217, + "step": 3690 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011798836147473225, + "loss": 0.2681, + "step": 3691 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011795121660332593, + "loss": 0.2257, + "step": 3692 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001179140691724677, + "loss": 0.2422, + "step": 3693 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011787691918745396, + "loss": 0.3328, + "step": 3694 + }, + { + "epoch": 4.38, + "learning_rate": 0.0001178397666535815, + "loss": 0.233, + "step": 3695 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011780261157614747, + "loss": 0.243, + "step": 3696 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011776545396044936, + "loss": 0.2089, + "step": 3697 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011772829381178502, + "loss": 0.2143, + "step": 3698 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011769113113545267, + "loss": 0.2135, + "step": 3699 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011765396593675097, + "loss": 0.2403, + "step": 3700 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011761679822097877, + "loss": 0.2182, + "step": 3701 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011757962799343547, + "loss": 0.2159, + "step": 3702 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011754245525942065, + "loss": 0.2098, + "step": 3703 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011750528002423437, + "loss": 0.2264, + "step": 3704 + }, + { + "epoch": 4.39, + "learning_rate": 0.000117468102293177, + "loss": 0.2023, + "step": 3705 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011743092207154929, + "loss": 0.2978, + "step": 3706 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001173937393646523, + "loss": 0.2311, + "step": 3707 + }, + { + "epoch": 4.39, + "learning_rate": 0.0001173565541777875, + "loss": 0.244, + "step": 3708 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011731936651625668, + "loss": 0.2058, + "step": 3709 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011728217638536197, + "loss": 0.3039, + "step": 3710 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011724498379040587, + "loss": 0.2142, + "step": 3711 + }, + { + "epoch": 4.4, + "learning_rate": 0.0001172077887366913, + "loss": 0.2262, + "step": 3712 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011717059122952136, + "loss": 0.2304, + "step": 3713 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011713339127419969, + "loss": 0.2093, + "step": 3714 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011709618887603014, + "loss": 0.2083, + "step": 3715 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011705898404031697, + "loss": 0.3559, + "step": 3716 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011702177677236479, + "loss": 0.2728, + "step": 3717 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011698456707747854, + "loss": 0.246, + "step": 3718 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011694735496096354, + "loss": 0.2031, + "step": 3719 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011691014042812536, + "loss": 0.2049, + "step": 3720 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011687292348427004, + "loss": 0.248, + "step": 3721 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011683570413470383, + "loss": 0.2189, + "step": 3722 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011679848238473352, + "loss": 0.2302, + "step": 3723 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011676125823966602, + "loss": 0.2839, + "step": 3724 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011672403170480872, + "loss": 0.2359, + "step": 3725 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011668680278546929, + "loss": 0.2288, + "step": 3726 + }, + { + "epoch": 4.42, + "learning_rate": 0.0001166495714869558, + "loss": 0.2718, + "step": 3727 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011661233781457654, + "loss": 0.1967, + "step": 3728 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011657510177364032, + "loss": 0.2098, + "step": 3729 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011653786336945614, + "loss": 0.2466, + "step": 3730 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011650062260733339, + "loss": 0.2207, + "step": 3731 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011646337949258175, + "loss": 0.2124, + "step": 3732 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011642613403051133, + "loss": 0.213, + "step": 3733 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011638888622643249, + "loss": 0.2276, + "step": 3734 + }, + { + "epoch": 4.43, + "learning_rate": 0.000116351636085656, + "loss": 0.2206, + "step": 3735 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011631438361349287, + "loss": 0.2382, + "step": 3736 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011627712881525452, + "loss": 0.2264, + "step": 3737 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011623987169625261, + "loss": 0.2392, + "step": 3738 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011620261226179927, + "loss": 0.2139, + "step": 3739 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011616535051720685, + "loss": 0.2103, + "step": 3740 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011612808646778806, + "loss": 0.211, + "step": 3741 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011609082011885592, + "loss": 0.2227, + "step": 3742 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011605355147572387, + "loss": 0.2459, + "step": 3743 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011601628054370553, + "loss": 0.2312, + "step": 3744 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011597900732811496, + "loss": 0.2244, + "step": 3745 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011594173183426647, + "loss": 0.2168, + "step": 3746 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011590445406747479, + "loss": 0.2711, + "step": 3747 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011586717403305487, + "loss": 0.1865, + "step": 3748 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011582989173632206, + "loss": 0.3104, + "step": 3749 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011579260718259197, + "loss": 0.2245, + "step": 3750 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011575532037718057, + "loss": 0.2316, + "step": 3751 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011571803132540418, + "loss": 0.2328, + "step": 3752 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011568074003257938, + "loss": 0.267, + "step": 3753 + }, + { + "epoch": 4.45, + "learning_rate": 0.0001156434465040231, + "loss": 0.2131, + "step": 3754 + }, + { + "epoch": 4.45, + "learning_rate": 0.0001156061507450526, + "loss": 0.1945, + "step": 3755 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011556885276098536, + "loss": 0.2344, + "step": 3756 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011553155255713937, + "loss": 0.2221, + "step": 3757 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011549425013883275, + "loss": 0.2098, + "step": 3758 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011545694551138409, + "loss": 0.2329, + "step": 3759 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011541963868011212, + "loss": 0.2187, + "step": 3760 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011538232965033601, + "loss": 0.1928, + "step": 3761 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011534501842737527, + "loss": 0.2103, + "step": 3762 + }, + { + "epoch": 4.46, + "eval_loss": 3.44382643699646, + "eval_runtime": 283.899, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3762 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011530770501654959, + "loss": 0.2563, + "step": 3763 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011527038942317911, + "loss": 0.1922, + "step": 3764 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011523307165258419, + "loss": 0.2246, + "step": 3765 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011519575171008552, + "loss": 0.2243, + "step": 3766 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011515842960100411, + "loss": 0.2481, + "step": 3767 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011512110533066132, + "loss": 0.2135, + "step": 3768 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011508377890437874, + "loss": 0.2019, + "step": 3769 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011504645032747832, + "loss": 0.2537, + "step": 3770 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011500911960528229, + "loss": 0.2131, + "step": 3771 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011497178674311317, + "loss": 0.2421, + "step": 3772 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011493445174629386, + "loss": 0.2012, + "step": 3773 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011489711462014751, + "loss": 0.2144, + "step": 3774 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011485977536999757, + "loss": 0.2411, + "step": 3775 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011482243400116779, + "loss": 0.192, + "step": 3776 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011478509051898225, + "loss": 0.2245, + "step": 3777 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011474774492876532, + "loss": 0.241, + "step": 3778 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011471039723584162, + "loss": 0.2172, + "step": 3779 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011467304744553618, + "loss": 0.2308, + "step": 3780 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011463569556317424, + "loss": 0.2523, + "step": 3781 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011459834159408137, + "loss": 0.216, + "step": 3782 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011456098554358342, + "loss": 0.2098, + "step": 3783 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011452362741700655, + "loss": 0.2101, + "step": 3784 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011448626721967717, + "loss": 0.3598, + "step": 3785 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011444890495692213, + "loss": 0.2131, + "step": 3786 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011441154063406841, + "loss": 0.3067, + "step": 3787 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011437417425644337, + "loss": 0.2866, + "step": 3788 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011433680582937461, + "loss": 0.2688, + "step": 3789 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011429943535819005, + "loss": 0.2286, + "step": 3790 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011426206284821792, + "loss": 0.215, + "step": 3791 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011422468830478679, + "loss": 0.2293, + "step": 3792 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011418731173322532, + "loss": 0.2614, + "step": 3793 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011414993313886272, + "loss": 0.2223, + "step": 3794 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011411255252702829, + "loss": 0.2415, + "step": 3795 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011407516990305169, + "loss": 0.2429, + "step": 3796 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001140377852722629, + "loss": 0.2862, + "step": 3797 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011400039863999214, + "loss": 0.2399, + "step": 3798 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011396301001156992, + "loss": 0.915, + "step": 3799 + }, + { + "epoch": 4.5, + "learning_rate": 0.00011392561939232706, + "loss": 0.2398, + "step": 3800 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011388822678759464, + "loss": 0.2817, + "step": 3801 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011385083220270401, + "loss": 0.2224, + "step": 3802 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011381343564298683, + "loss": 0.2319, + "step": 3803 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011377603711377504, + "loss": 0.2269, + "step": 3804 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011373863662040087, + "loss": 0.2552, + "step": 3805 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011370123416819682, + "loss": 0.2335, + "step": 3806 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011366382976249564, + "loss": 0.2197, + "step": 3807 + }, + { + "epoch": 4.51, + "learning_rate": 0.00011362642340863034, + "loss": 0.2433, + "step": 3808 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011358901511193431, + "loss": 0.2135, + "step": 3809 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001135516048777412, + "loss": 0.2488, + "step": 3810 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001135141927113848, + "loss": 0.2426, + "step": 3811 + }, + { + "epoch": 4.52, + "learning_rate": 0.0001134767786181993, + "loss": 0.247, + "step": 3812 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011343936260351913, + "loss": 0.2235, + "step": 3813 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011340194467267901, + "loss": 0.2109, + "step": 3814 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011336452483101394, + "loss": 0.2545, + "step": 3815 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011332710308385914, + "loss": 0.2104, + "step": 3816 + }, + { + "epoch": 4.52, + "learning_rate": 0.00011328967943655016, + "loss": 0.2089, + "step": 3817 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011325225389442277, + "loss": 0.2658, + "step": 3818 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011321482646281301, + "loss": 0.2736, + "step": 3819 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011317739714705731, + "loss": 0.2562, + "step": 3820 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011313996595249219, + "loss": 0.2223, + "step": 3821 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011310253288445456, + "loss": 0.2212, + "step": 3822 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011306509794828153, + "loss": 0.2217, + "step": 3823 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011302766114931054, + "loss": 0.2321, + "step": 3824 + }, + { + "epoch": 4.53, + "learning_rate": 0.00011299022249287922, + "loss": 0.2423, + "step": 3825 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011295278198432557, + "loss": 0.2651, + "step": 3826 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001129153396289877, + "loss": 0.2256, + "step": 3827 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011287789543220417, + "loss": 0.2656, + "step": 3828 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011284044939931364, + "loss": 0.2332, + "step": 3829 + }, + { + "epoch": 4.54, + "learning_rate": 0.0001128030015356551, + "loss": 0.2121, + "step": 3830 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011276555184656783, + "loss": 0.2148, + "step": 3831 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011272810033739135, + "loss": 0.234, + "step": 3832 + }, + { + "epoch": 4.54, + "learning_rate": 0.00011269064701346534, + "loss": 0.2466, + "step": 3833 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011265319188012994, + "loss": 0.2008, + "step": 3834 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011261573494272538, + "loss": 0.1905, + "step": 3835 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011257827620659216, + "loss": 0.2515, + "step": 3836 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011254081567707115, + "loss": 0.2579, + "step": 3837 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011250335335950342, + "loss": 0.2598, + "step": 3838 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011246588925923018, + "loss": 0.2399, + "step": 3839 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011242842338159309, + "loss": 0.2181, + "step": 3840 + }, + { + "epoch": 4.55, + "learning_rate": 0.0001123909557319339, + "loss": 0.2744, + "step": 3841 + }, + { + "epoch": 4.55, + "learning_rate": 0.00011235348631559473, + "loss": 0.2149, + "step": 3842 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011231601513791786, + "loss": 0.2184, + "step": 3843 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001122785422042459, + "loss": 0.2098, + "step": 3844 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011224106751992163, + "loss": 0.2277, + "step": 3845 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011220359109028815, + "loss": 0.2571, + "step": 3846 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011216611292068881, + "loss": 0.2087, + "step": 3847 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001121286330164671, + "loss": 0.2497, + "step": 3848 + }, + { + "epoch": 4.56, + "learning_rate": 0.00011209115138296693, + "loss": 0.1869, + "step": 3849 + }, + { + "epoch": 4.56, + "learning_rate": 0.0001120536680255323, + "loss": 0.239, + "step": 3850 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011201618294950756, + "loss": 0.2018, + "step": 3851 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011197869616023722, + "loss": 0.2751, + "step": 3852 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011194120766306611, + "loss": 0.2526, + "step": 3853 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011190371746333923, + "loss": 0.2657, + "step": 3854 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011186622556640194, + "loss": 0.2659, + "step": 3855 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011182873197759971, + "loss": 0.2401, + "step": 3856 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011179123670227833, + "loss": 0.2299, + "step": 3857 + }, + { + "epoch": 4.57, + "learning_rate": 0.00011175373974578378, + "loss": 0.2249, + "step": 3858 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011171624111346232, + "loss": 0.2457, + "step": 3859 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011167874081066045, + "loss": 0.192, + "step": 3860 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011164123884272493, + "loss": 0.2591, + "step": 3861 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011160373521500264, + "loss": 0.2632, + "step": 3862 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011156622993284084, + "loss": 0.248, + "step": 3863 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011152872300158694, + "loss": 0.2071, + "step": 3864 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011149121442658861, + "loss": 0.2935, + "step": 3865 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011145370421319377, + "loss": 0.2191, + "step": 3866 + }, + { + "epoch": 4.58, + "learning_rate": 0.00011141619236675056, + "loss": 0.2737, + "step": 3867 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011137867889260734, + "loss": 0.2281, + "step": 3868 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011134116379611273, + "loss": 0.2083, + "step": 3869 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011130364708261552, + "loss": 0.2079, + "step": 3870 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011126612875746479, + "loss": 0.2423, + "step": 3871 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011122860882600986, + "loss": 0.1903, + "step": 3872 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011119108729360026, + "loss": 0.1995, + "step": 3873 + }, + { + "epoch": 4.59, + "learning_rate": 0.0001111535641655857, + "loss": 0.2479, + "step": 3874 + }, + { + "epoch": 4.59, + "learning_rate": 0.00011111603944731623, + "loss": 0.198, + "step": 3875 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011107851314414197, + "loss": 0.2242, + "step": 3876 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001110409852614134, + "loss": 0.29, + "step": 3877 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011100345580448118, + "loss": 0.1931, + "step": 3878 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011096592477869616, + "loss": 0.2195, + "step": 3879 + }, + { + "epoch": 4.6, + "learning_rate": 0.0001109283921894095, + "loss": 0.2383, + "step": 3880 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011089085804197248, + "loss": 0.2729, + "step": 3881 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011085332234173664, + "loss": 0.1836, + "step": 3882 + }, + { + "epoch": 4.6, + "learning_rate": 0.00011081578509405382, + "loss": 0.2724, + "step": 3883 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011077824630427594, + "loss": 0.2027, + "step": 3884 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011074070597775527, + "loss": 0.2681, + "step": 3885 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011070316411984421, + "loss": 0.205, + "step": 3886 + }, + { + "epoch": 4.61, + "learning_rate": 0.0001106656207358954, + "loss": 0.3106, + "step": 3887 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011062807583126172, + "loss": 0.2126, + "step": 3888 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011059052941129628, + "loss": 0.4017, + "step": 3889 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011055298148135236, + "loss": 0.2406, + "step": 3890 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011051543204678348, + "loss": 0.2833, + "step": 3891 + }, + { + "epoch": 4.61, + "learning_rate": 0.00011047788111294333, + "loss": 0.2224, + "step": 3892 + }, + { + "epoch": 4.62, + "learning_rate": 0.0001104403286851859, + "loss": 0.3536, + "step": 3893 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011040277476886533, + "loss": 0.2373, + "step": 3894 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011036521936933604, + "loss": 0.2297, + "step": 3895 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011032766249195252, + "loss": 0.1979, + "step": 3896 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011029010414206965, + "loss": 0.2434, + "step": 3897 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011025254432504233, + "loss": 0.2897, + "step": 3898 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011021498304622586, + "loss": 0.2121, + "step": 3899 + }, + { + "epoch": 4.62, + "learning_rate": 0.00011017742031097563, + "loss": 0.3021, + "step": 3900 + }, + { + "epoch": 4.63, + "learning_rate": 0.00011013985612464726, + "loss": 0.2463, + "step": 3901 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001101022904925966, + "loss": 0.3078, + "step": 3902 + }, + { + "epoch": 4.63, + "learning_rate": 0.00011006472342017966, + "loss": 0.3664, + "step": 3903 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001100271549127527, + "loss": 0.2176, + "step": 3904 + }, + { + "epoch": 4.63, + "learning_rate": 0.0001099895849756722, + "loss": 0.2137, + "step": 3905 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010995201361429474, + "loss": 0.2588, + "step": 3906 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010991444083397728, + "loss": 0.2686, + "step": 3907 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010987686664007679, + "loss": 0.2235, + "step": 3908 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010983929103795059, + "loss": 0.2602, + "step": 3909 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001098017140329561, + "loss": 0.1857, + "step": 3910 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010976413563045094, + "loss": 0.2307, + "step": 3911 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010972655583579308, + "loss": 0.2658, + "step": 3912 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010968897465434051, + "loss": 0.2106, + "step": 3913 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010965139209145152, + "loss": 0.2122, + "step": 3914 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010961380815248454, + "loss": 0.2433, + "step": 3915 + }, + { + "epoch": 4.64, + "learning_rate": 0.0001095762228427982, + "loss": 0.2032, + "step": 3916 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010953863616775138, + "loss": 0.3393, + "step": 3917 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010950104813270314, + "loss": 0.2476, + "step": 3918 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010946345874301264, + "loss": 0.1929, + "step": 3919 + }, + { + "epoch": 4.65, + "learning_rate": 0.0001094258680040394, + "loss": 0.2509, + "step": 3920 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010938827592114294, + "loss": 0.2103, + "step": 3921 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010935068249968314, + "loss": 0.2297, + "step": 3922 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010931308774501998, + "loss": 0.2259, + "step": 3923 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010927549166251368, + "loss": 0.238, + "step": 3924 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010923789425752456, + "loss": 0.3147, + "step": 3925 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010920029553541326, + "loss": 0.2753, + "step": 3926 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010916269550154048, + "loss": 0.2399, + "step": 3927 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001091250941612672, + "loss": 0.2196, + "step": 3928 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010908749151995452, + "loss": 0.2326, + "step": 3929 + }, + { + "epoch": 4.66, + "learning_rate": 0.0001090498875829638, + "loss": 0.2217, + "step": 3930 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010901228235565651, + "loss": 0.2012, + "step": 3931 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010897467584339434, + "loss": 0.2018, + "step": 3932 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010893706805153915, + "loss": 0.2382, + "step": 3933 + }, + { + "epoch": 4.67, + "learning_rate": 0.000108899458985453, + "loss": 0.2202, + "step": 3934 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010886184865049813, + "loss": 0.2038, + "step": 3935 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010882423705203698, + "loss": 0.2406, + "step": 3936 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010878662419543206, + "loss": 0.2393, + "step": 3937 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010874901008604623, + "loss": 0.2626, + "step": 3938 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010871139472924237, + "loss": 0.246, + "step": 3939 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010867377813038366, + "loss": 0.2228, + "step": 3940 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010863616029483339, + "loss": 0.2091, + "step": 3941 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010859854122795508, + "loss": 0.215, + "step": 3942 + }, + { + "epoch": 4.68, + "learning_rate": 0.0001085609209351123, + "loss": 0.2071, + "step": 3943 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010852329942166894, + "loss": 0.2208, + "step": 3944 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010848567669298901, + "loss": 0.1988, + "step": 3945 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010844805275443673, + "loss": 0.2129, + "step": 3946 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010841042761137634, + "loss": 0.3038, + "step": 3947 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010837280126917248, + "loss": 0.206, + "step": 3948 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010833517373318975, + "loss": 0.2648, + "step": 3949 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010829754500879308, + "loss": 0.2136, + "step": 3950 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001082599151013475, + "loss": 0.3746, + "step": 3951 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010822228401621819, + "loss": 0.2403, + "step": 3952 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010818465175877052, + "loss": 0.2288, + "step": 3953 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001081470183343701, + "loss": 0.2099, + "step": 3954 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010810938374838251, + "loss": 0.1992, + "step": 3955 + }, + { + "epoch": 4.69, + "learning_rate": 0.0001080717480061737, + "loss": 0.2337, + "step": 3956 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010803411111310971, + "loss": 0.2127, + "step": 3957 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010799647307455674, + "loss": 0.2936, + "step": 3958 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010795883389588111, + "loss": 0.3019, + "step": 3959 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010792119358244939, + "loss": 0.2262, + "step": 3960 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010788355213962825, + "loss": 0.2561, + "step": 3961 + }, + { + "epoch": 4.7, + "learning_rate": 0.0001078459095727845, + "loss": 0.2992, + "step": 3962 + }, + { + "epoch": 4.7, + "learning_rate": 0.0001078082658872852, + "loss": 0.2081, + "step": 3963 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010777062108849756, + "loss": 0.2089, + "step": 3964 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010773297518178881, + "loss": 0.2112, + "step": 3965 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010769532817252653, + "loss": 0.1898, + "step": 3966 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010765768006607826, + "loss": 0.3229, + "step": 3967 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010762003086781185, + "loss": 0.2241, + "step": 3968 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010758238058309527, + "loss": 0.2814, + "step": 3969 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010754472921729661, + "loss": 0.2403, + "step": 3970 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010750707677578413, + "loss": 0.2715, + "step": 3971 + }, + { + "epoch": 4.71, + "eval_loss": 3.3954412937164307, + "eval_runtime": 283.9122, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 3971 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010746942326392628, + "loss": 0.2263, + "step": 3972 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010743176868709157, + "loss": 0.2433, + "step": 3973 + }, + { + "epoch": 4.71, + "learning_rate": 0.0001073941130506488, + "loss": 0.2871, + "step": 3974 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010735645635996676, + "loss": 0.2416, + "step": 3975 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001073187986204145, + "loss": 0.2563, + "step": 3976 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010728113983736126, + "loss": 0.2502, + "step": 3977 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010724348001617625, + "loss": 0.2145, + "step": 3978 + }, + { + "epoch": 4.72, + "learning_rate": 0.000107205819162229, + "loss": 0.2639, + "step": 3979 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010716815728088912, + "loss": 0.2279, + "step": 3980 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001071304943775264, + "loss": 0.2086, + "step": 3981 + }, + { + "epoch": 4.72, + "learning_rate": 0.00010709283045751069, + "loss": 0.2142, + "step": 3982 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001070551655262121, + "loss": 0.2381, + "step": 3983 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010701749958900078, + "loss": 0.2313, + "step": 3984 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001069798326512471, + "loss": 0.1954, + "step": 3985 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010694216471832152, + "loss": 0.2253, + "step": 3986 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010690449579559469, + "loss": 0.2104, + "step": 3987 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010686682588843737, + "loss": 0.2172, + "step": 3988 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010682915500222051, + "loss": 0.2094, + "step": 3989 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010679148314231504, + "loss": 0.2885, + "step": 3990 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010675381031409225, + "loss": 0.3085, + "step": 3991 + }, + { + "epoch": 4.73, + "learning_rate": 0.00010671613652292343, + "loss": 0.2515, + "step": 3992 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010667846177418003, + "loss": 0.2314, + "step": 3993 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010664078607323367, + "loss": 0.2473, + "step": 3994 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010660310942545608, + "loss": 0.2283, + "step": 3995 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010656543183621912, + "loss": 0.226, + "step": 3996 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010652775331089477, + "loss": 0.2169, + "step": 3997 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010649007385485519, + "loss": 0.2079, + "step": 3998 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010645239347347269, + "loss": 0.2437, + "step": 3999 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010641471217211958, + "loss": 0.2127, + "step": 4000 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010637702995616847, + "loss": 0.2527, + "step": 4001 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010633934683099196, + "loss": 0.2193, + "step": 4002 + }, + { + "epoch": 4.75, + "learning_rate": 0.0001063016628019629, + "loss": 0.2744, + "step": 4003 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010626397787445416, + "loss": 0.2592, + "step": 4004 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010622629205383885, + "loss": 0.2107, + "step": 4005 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010618860534549006, + "loss": 0.1956, + "step": 4006 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010615091775478117, + "loss": 0.2546, + "step": 4007 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010611322928708555, + "loss": 0.2376, + "step": 4008 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010607553994777684, + "loss": 0.2359, + "step": 4009 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010603784974222861, + "loss": 0.2631, + "step": 4010 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010600015867581474, + "loss": 0.2602, + "step": 4011 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010596246675390911, + "loss": 0.2043, + "step": 4012 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010592477398188575, + "loss": 0.2325, + "step": 4013 + }, + { + "epoch": 4.76, + "learning_rate": 0.0001058870803651189, + "loss": 0.2395, + "step": 4014 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010584938590898281, + "loss": 0.2205, + "step": 4015 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010581169061885185, + "loss": 0.2169, + "step": 4016 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010577399450010062, + "loss": 0.1986, + "step": 4017 + }, + { + "epoch": 4.77, + "learning_rate": 0.0001057362975581037, + "loss": 0.2011, + "step": 4018 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010569859979823586, + "loss": 0.2208, + "step": 4019 + }, + { + "epoch": 4.77, + "learning_rate": 0.000105660901225872, + "loss": 0.2478, + "step": 4020 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010562320184638714, + "loss": 0.1936, + "step": 4021 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010558550166515633, + "loss": 0.2719, + "step": 4022 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010554780068755483, + "loss": 0.2873, + "step": 4023 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010551009891895796, + "loss": 0.1993, + "step": 4024 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010547239636474115, + "loss": 0.2174, + "step": 4025 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010543469303028002, + "loss": 0.2009, + "step": 4026 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010539698892095021, + "loss": 0.2038, + "step": 4027 + }, + { + "epoch": 4.78, + "learning_rate": 0.0001053592840421275, + "loss": 0.2119, + "step": 4028 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010532157839918779, + "loss": 0.242, + "step": 4029 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010528387199750707, + "loss": 0.2026, + "step": 4030 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010524616484246146, + "loss": 0.2445, + "step": 4031 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010520845693942719, + "loss": 0.2793, + "step": 4032 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010517074829378057, + "loss": 0.2658, + "step": 4033 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010513303891089803, + "loss": 0.2069, + "step": 4034 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010509532879615614, + "loss": 0.2211, + "step": 4035 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010505761795493145, + "loss": 0.2078, + "step": 4036 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010501990639260079, + "loss": 0.2796, + "step": 4037 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010498219411454098, + "loss": 0.2201, + "step": 4038 + }, + { + "epoch": 4.79, + "learning_rate": 0.000104944481126129, + "loss": 0.198, + "step": 4039 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010490676743274181, + "loss": 0.2182, + "step": 4040 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010486905303975664, + "loss": 0.216, + "step": 4041 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010483133795255071, + "loss": 0.2365, + "step": 4042 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010479362217650137, + "loss": 0.2472, + "step": 4043 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001047559057169861, + "loss": 0.2259, + "step": 4044 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010471818857938238, + "loss": 0.2306, + "step": 4045 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010468047076906793, + "loss": 0.2689, + "step": 4046 + }, + { + "epoch": 4.8, + "learning_rate": 0.0001046427522914204, + "loss": 0.2361, + "step": 4047 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010460503315181768, + "loss": 0.2919, + "step": 4048 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010456731335563769, + "loss": 0.2397, + "step": 4049 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010452959290825846, + "loss": 0.2144, + "step": 4050 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010449187181505804, + "loss": 0.258, + "step": 4051 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010445415008141473, + "loss": 0.2199, + "step": 4052 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010441642771270675, + "loss": 0.1817, + "step": 4053 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010437870471431251, + "loss": 0.2089, + "step": 4054 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010434098109161051, + "loss": 0.2047, + "step": 4055 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010430325684997928, + "loss": 0.2067, + "step": 4056 + }, + { + "epoch": 4.81, + "learning_rate": 0.00010426553199479749, + "loss": 0.1996, + "step": 4057 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001042278065314439, + "loss": 0.2205, + "step": 4058 + }, + { + "epoch": 4.82, + "learning_rate": 0.0001041900804652973, + "loss": 0.2508, + "step": 4059 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010415235380173662, + "loss": 0.2562, + "step": 4060 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010411462654614088, + "loss": 0.2199, + "step": 4061 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010407689870388916, + "loss": 0.2718, + "step": 4062 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010403917028036058, + "loss": 0.2292, + "step": 4063 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010400144128093448, + "loss": 0.3123, + "step": 4064 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010396371171099006, + "loss": 0.2814, + "step": 4065 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010392598157590688, + "loss": 0.231, + "step": 4066 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010388825088106433, + "loss": 0.2242, + "step": 4067 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010385051963184202, + "loss": 0.1998, + "step": 4068 + }, + { + "epoch": 4.83, + "learning_rate": 0.0001038127878336196, + "loss": 0.1902, + "step": 4069 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010377505549177682, + "loss": 0.2198, + "step": 4070 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010373732261169346, + "loss": 0.2537, + "step": 4071 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010369958919874943, + "loss": 0.2267, + "step": 4072 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010366185525832467, + "loss": 0.2376, + "step": 4073 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010362412079579924, + "loss": 0.2076, + "step": 4074 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010358638581655322, + "loss": 0.2507, + "step": 4075 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010354865032596682, + "loss": 0.2077, + "step": 4076 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010351091432942029, + "loss": 0.2762, + "step": 4077 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010347317783229398, + "loss": 0.2232, + "step": 4078 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010343544083996824, + "loss": 0.2475, + "step": 4079 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010339770335782359, + "loss": 0.2108, + "step": 4080 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010335996539124055, + "loss": 0.2544, + "step": 4081 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010332222694559975, + "loss": 0.2253, + "step": 4082 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010328448802628183, + "loss": 0.2324, + "step": 4083 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010324674863866759, + "loss": 0.287, + "step": 4084 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001032090087881378, + "loss": 0.3515, + "step": 4085 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010317126848007337, + "loss": 0.2242, + "step": 4086 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001031335277198552, + "loss": 0.2242, + "step": 4087 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010309578651286436, + "loss": 0.1879, + "step": 4088 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010305804486448186, + "loss": 0.2261, + "step": 4089 + }, + { + "epoch": 4.85, + "learning_rate": 0.0001030203027800889, + "loss": 0.2415, + "step": 4090 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010298256026506662, + "loss": 0.2141, + "step": 4091 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010294481732479635, + "loss": 0.2015, + "step": 4092 + }, + { + "epoch": 4.86, + "learning_rate": 0.0001029070739646593, + "loss": 0.206, + "step": 4093 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010286933019003697, + "loss": 0.2598, + "step": 4094 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010283158600631072, + "loss": 0.2561, + "step": 4095 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010279384141886208, + "loss": 0.1914, + "step": 4096 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010275609643307258, + "loss": 0.2416, + "step": 4097 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010271835105432388, + "loss": 0.2012, + "step": 4098 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010268060528799754, + "loss": 0.3043, + "step": 4099 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010264285913947545, + "loss": 0.2331, + "step": 4100 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010260511261413923, + "loss": 0.237, + "step": 4101 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010256736571737083, + "loss": 0.2776, + "step": 4102 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010252961845455205, + "loss": 0.1938, + "step": 4103 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010249187083106486, + "loss": 0.2596, + "step": 4104 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010245412285229124, + "loss": 0.196, + "step": 4105 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010241637452361323, + "loss": 0.2369, + "step": 4106 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010237862585041293, + "loss": 0.2091, + "step": 4107 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010234087683807247, + "loss": 0.2273, + "step": 4108 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010230312749197406, + "loss": 0.3996, + "step": 4109 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010226537781749987, + "loss": 0.2382, + "step": 4110 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010222762782003223, + "loss": 0.2174, + "step": 4111 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010218987750495343, + "loss": 0.2569, + "step": 4112 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010215212687764593, + "loss": 0.2239, + "step": 4113 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010211437594349203, + "loss": 0.3192, + "step": 4114 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010207662470787427, + "loss": 0.2347, + "step": 4115 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010203887317617511, + "loss": 0.2461, + "step": 4116 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010200112135377709, + "loss": 0.2826, + "step": 4117 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010196336924606283, + "loss": 0.3531, + "step": 4118 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010192561685841496, + "loss": 0.2104, + "step": 4119 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010188786419621612, + "loss": 0.2257, + "step": 4120 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010185011126484903, + "loss": 0.2096, + "step": 4121 + }, + { + "epoch": 4.89, + "learning_rate": 0.0001018123580696964, + "loss": 0.2009, + "step": 4122 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010177460461614108, + "loss": 0.3198, + "step": 4123 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010173685090956582, + "loss": 0.1979, + "step": 4124 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010169909695535354, + "loss": 0.2507, + "step": 4125 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010166134275888708, + "loss": 0.2295, + "step": 4126 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010162358832554937, + "loss": 0.2355, + "step": 4127 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010158583366072338, + "loss": 0.2253, + "step": 4128 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010154807876979213, + "loss": 0.3306, + "step": 4129 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010151032365813859, + "loss": 0.2265, + "step": 4130 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010147256833114586, + "loss": 0.2176, + "step": 4131 + }, + { + "epoch": 4.9, + "learning_rate": 0.000101434812794197, + "loss": 0.2631, + "step": 4132 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010139705705267513, + "loss": 0.2241, + "step": 4133 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010135930111196338, + "loss": 0.2317, + "step": 4134 + }, + { + "epoch": 4.91, + "learning_rate": 0.000101321544977445, + "loss": 0.2325, + "step": 4135 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010128378865450307, + "loss": 0.2011, + "step": 4136 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010124603214852093, + "loss": 0.232, + "step": 4137 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010120827546488174, + "loss": 0.2624, + "step": 4138 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010117051860896885, + "loss": 0.2452, + "step": 4139 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010113276158616553, + "loss": 0.2261, + "step": 4140 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010109500440185514, + "loss": 0.2378, + "step": 4141 + }, + { + "epoch": 4.91, + "learning_rate": 0.000101057247061421, + "loss": 0.2172, + "step": 4142 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010101948957024647, + "loss": 0.2539, + "step": 4143 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010098173193371499, + "loss": 0.2178, + "step": 4144 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010094397415720991, + "loss": 0.2545, + "step": 4145 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010090621624611474, + "loss": 0.2233, + "step": 4146 + }, + { + "epoch": 4.92, + "learning_rate": 0.0001008684582058129, + "loss": 0.2547, + "step": 4147 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010083070004168786, + "loss": 0.232, + "step": 4148 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010079294175912313, + "loss": 0.313, + "step": 4149 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010075518336350218, + "loss": 0.2234, + "step": 4150 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010071742486020854, + "loss": 0.2447, + "step": 4151 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010067966625462577, + "loss": 0.246, + "step": 4152 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010064190755213745, + "loss": 0.1836, + "step": 4153 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010060414875812709, + "loss": 0.2655, + "step": 4154 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010056638987797833, + "loss": 0.2338, + "step": 4155 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010052863091707467, + "loss": 0.2014, + "step": 4156 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010049087188079983, + "loss": 0.2492, + "step": 4157 + }, + { + "epoch": 4.93, + "learning_rate": 0.0001004531127745373, + "loss": 0.2547, + "step": 4158 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010041535360367085, + "loss": 0.2837, + "step": 4159 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010037759437358398, + "loss": 0.2598, + "step": 4160 + }, + { + "epoch": 4.94, + "learning_rate": 0.0001003398350896604, + "loss": 0.2047, + "step": 4161 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010030207575728374, + "loss": 0.2006, + "step": 4162 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010026431638183771, + "loss": 0.2399, + "step": 4163 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010022655696870588, + "loss": 0.2508, + "step": 4164 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010018879752327202, + "loss": 0.2217, + "step": 4165 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010015103805091973, + "loss": 0.2649, + "step": 4166 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010011327855703267, + "loss": 0.2819, + "step": 4167 + }, + { + "epoch": 4.95, + "learning_rate": 0.00010007551904699459, + "loss": 0.2435, + "step": 4168 + }, + { + "epoch": 4.95, + "learning_rate": 0.00010003775952618914, + "loss": 0.2641, + "step": 4169 + }, + { + "epoch": 4.95, + "learning_rate": 0.0001, + "loss": 0.2157, + "step": 4170 + }, + { + "epoch": 4.95, + "learning_rate": 9.996224047381087e-05, + "loss": 0.2414, + "step": 4171 + }, + { + "epoch": 4.95, + "learning_rate": 9.992448095300542e-05, + "loss": 0.1886, + "step": 4172 + }, + { + "epoch": 4.95, + "learning_rate": 9.988672144296735e-05, + "loss": 0.2392, + "step": 4173 + }, + { + "epoch": 4.95, + "learning_rate": 9.984896194908031e-05, + "loss": 0.2231, + "step": 4174 + }, + { + "epoch": 4.95, + "learning_rate": 9.981120247672801e-05, + "loss": 0.2024, + "step": 4175 + }, + { + "epoch": 4.96, + "learning_rate": 9.97734430312941e-05, + "loss": 0.2104, + "step": 4176 + }, + { + "epoch": 4.96, + "learning_rate": 9.973568361816233e-05, + "loss": 0.2793, + "step": 4177 + }, + { + "epoch": 4.96, + "learning_rate": 9.969792424271627e-05, + "loss": 0.2292, + "step": 4178 + }, + { + "epoch": 4.96, + "learning_rate": 9.966016491033962e-05, + "loss": 0.2046, + "step": 4179 + }, + { + "epoch": 4.96, + "learning_rate": 9.962240562641602e-05, + "loss": 0.2099, + "step": 4180 + }, + { + "epoch": 4.96, + "eval_loss": 3.49141788482666, + "eval_runtime": 284.0307, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 4180 + }, + { + "epoch": 4.96, + "learning_rate": 9.95846463963292e-05, + "loss": 0.2376, + "step": 4181 + }, + { + "epoch": 4.96, + "learning_rate": 9.95468872254627e-05, + "loss": 0.21, + "step": 4182 + }, + { + "epoch": 4.96, + "learning_rate": 9.950912811920021e-05, + "loss": 0.2079, + "step": 4183 + }, + { + "epoch": 4.97, + "learning_rate": 9.947136908292534e-05, + "loss": 0.2151, + "step": 4184 + }, + { + "epoch": 4.97, + "learning_rate": 9.943361012202172e-05, + "loss": 0.2247, + "step": 4185 + }, + { + "epoch": 4.97, + "learning_rate": 9.939585124187292e-05, + "loss": 0.2318, + "step": 4186 + }, + { + "epoch": 4.97, + "learning_rate": 9.935809244786256e-05, + "loss": 0.2414, + "step": 4187 + }, + { + "epoch": 4.97, + "learning_rate": 9.932033374537422e-05, + "loss": 0.1861, + "step": 4188 + }, + { + "epoch": 4.97, + "learning_rate": 9.92825751397915e-05, + "loss": 0.2475, + "step": 4189 + }, + { + "epoch": 4.97, + "learning_rate": 9.924481663649785e-05, + "loss": 0.2163, + "step": 4190 + }, + { + "epoch": 4.97, + "learning_rate": 9.92070582408769e-05, + "loss": 0.2044, + "step": 4191 + }, + { + "epoch": 4.97, + "learning_rate": 9.916929995831215e-05, + "loss": 0.2369, + "step": 4192 + }, + { + "epoch": 4.98, + "learning_rate": 9.913154179418713e-05, + "loss": 0.2553, + "step": 4193 + }, + { + "epoch": 4.98, + "learning_rate": 9.909378375388529e-05, + "loss": 0.3004, + "step": 4194 + }, + { + "epoch": 4.98, + "learning_rate": 9.90560258427901e-05, + "loss": 0.2313, + "step": 4195 + }, + { + "epoch": 4.98, + "learning_rate": 9.901826806628505e-05, + "loss": 0.2134, + "step": 4196 + }, + { + "epoch": 4.98, + "learning_rate": 9.898051042975358e-05, + "loss": 0.2175, + "step": 4197 + }, + { + "epoch": 4.98, + "learning_rate": 9.894275293857904e-05, + "loss": 0.1946, + "step": 4198 + }, + { + "epoch": 4.98, + "learning_rate": 9.890499559814487e-05, + "loss": 0.2395, + "step": 4199 + }, + { + "epoch": 4.98, + "learning_rate": 9.886723841383448e-05, + "loss": 0.215, + "step": 4200 + }, + { + "epoch": 4.99, + "learning_rate": 9.882948139103118e-05, + "loss": 0.2206, + "step": 4201 + }, + { + "epoch": 4.99, + "learning_rate": 9.879172453511827e-05, + "loss": 0.3013, + "step": 4202 + }, + { + "epoch": 4.99, + "learning_rate": 9.875396785147909e-05, + "loss": 0.2874, + "step": 4203 + }, + { + "epoch": 4.99, + "learning_rate": 9.871621134549692e-05, + "loss": 0.2445, + "step": 4204 + }, + { + "epoch": 4.99, + "learning_rate": 9.867845502255506e-05, + "loss": 0.2294, + "step": 4205 + }, + { + "epoch": 4.99, + "learning_rate": 9.864069888803663e-05, + "loss": 0.2251, + "step": 4206 + }, + { + "epoch": 4.99, + "learning_rate": 9.860294294732489e-05, + "loss": 0.2164, + "step": 4207 + }, + { + "epoch": 4.99, + "learning_rate": 9.856518720580303e-05, + "loss": 0.2441, + "step": 4208 + }, + { + "epoch": 5.0, + "learning_rate": 9.852743166885417e-05, + "loss": 0.1956, + "step": 4209 + }, + { + "epoch": 5.0, + "learning_rate": 9.848967634186142e-05, + "loss": 0.2312, + "step": 4210 + }, + { + "epoch": 5.0, + "learning_rate": 9.845192123020789e-05, + "loss": 0.2617, + "step": 4211 + }, + { + "epoch": 5.0, + "learning_rate": 9.841416633927662e-05, + "loss": 0.2207, + "step": 4212 + }, + { + "epoch": 5.0, + "learning_rate": 9.837641167445065e-05, + "loss": 0.1902, + "step": 4213 + }, + { + "epoch": 5.0, + "learning_rate": 9.833865724111295e-05, + "loss": 0.2028, + "step": 4214 + }, + { + "epoch": 5.0, + "learning_rate": 9.830090304464647e-05, + "loss": 0.2526, + "step": 4215 + }, + { + "epoch": 5.0, + "learning_rate": 9.826314909043418e-05, + "loss": 0.2181, + "step": 4216 + }, + { + "epoch": 5.0, + "learning_rate": 9.822539538385897e-05, + "loss": 0.2086, + "step": 4217 + }, + { + "epoch": 5.01, + "learning_rate": 9.818764193030363e-05, + "loss": 0.252, + "step": 4218 + }, + { + "epoch": 5.01, + "learning_rate": 9.8149888735151e-05, + "loss": 0.248, + "step": 4219 + }, + { + "epoch": 5.01, + "learning_rate": 9.81121358037839e-05, + "loss": 0.1881, + "step": 4220 + }, + { + "epoch": 5.01, + "learning_rate": 9.807438314158508e-05, + "loss": 0.244, + "step": 4221 + }, + { + "epoch": 5.01, + "learning_rate": 9.803663075393718e-05, + "loss": 0.2777, + "step": 4222 + }, + { + "epoch": 5.01, + "learning_rate": 9.799887864622292e-05, + "loss": 0.2263, + "step": 4223 + }, + { + "epoch": 5.01, + "learning_rate": 9.796112682382493e-05, + "loss": 0.2016, + "step": 4224 + }, + { + "epoch": 5.01, + "learning_rate": 9.792337529212578e-05, + "loss": 0.212, + "step": 4225 + }, + { + "epoch": 5.02, + "learning_rate": 9.7885624056508e-05, + "loss": 0.2941, + "step": 4226 + }, + { + "epoch": 5.02, + "learning_rate": 9.784787312235411e-05, + "loss": 0.2384, + "step": 4227 + }, + { + "epoch": 5.0, + "learning_rate": 9.781012249504655e-05, + "loss": 0.1963, + "step": 4228 + }, + { + "epoch": 5.0, + "learning_rate": 9.777237217996779e-05, + "loss": 0.1817, + "step": 4229 + }, + { + "epoch": 5.0, + "learning_rate": 9.773462218250015e-05, + "loss": 0.1794, + "step": 4230 + }, + { + "epoch": 5.0, + "learning_rate": 9.769687250802597e-05, + "loss": 0.1783, + "step": 4231 + }, + { + "epoch": 5.01, + "learning_rate": 9.765912316192752e-05, + "loss": 0.2024, + "step": 4232 + }, + { + "epoch": 5.01, + "learning_rate": 9.76213741495871e-05, + "loss": 0.1771, + "step": 4233 + }, + { + "epoch": 5.01, + "learning_rate": 9.75836254763868e-05, + "loss": 0.1834, + "step": 4234 + }, + { + "epoch": 5.01, + "learning_rate": 9.754587714770878e-05, + "loss": 0.1689, + "step": 4235 + }, + { + "epoch": 5.01, + "learning_rate": 9.750812916893517e-05, + "loss": 0.1893, + "step": 4236 + }, + { + "epoch": 5.01, + "learning_rate": 9.747038154544795e-05, + "loss": 0.1801, + "step": 4237 + }, + { + "epoch": 5.01, + "learning_rate": 9.743263428262921e-05, + "loss": 0.1973, + "step": 4238 + }, + { + "epoch": 5.01, + "learning_rate": 9.739488738586078e-05, + "loss": 0.1716, + "step": 4239 + }, + { + "epoch": 5.02, + "learning_rate": 9.735714086052458e-05, + "loss": 0.1803, + "step": 4240 + }, + { + "epoch": 5.02, + "learning_rate": 9.731939471200244e-05, + "loss": 0.199, + "step": 4241 + }, + { + "epoch": 5.02, + "learning_rate": 9.728164894567617e-05, + "loss": 0.1682, + "step": 4242 + }, + { + "epoch": 5.02, + "learning_rate": 9.724390356692745e-05, + "loss": 0.2256, + "step": 4243 + }, + { + "epoch": 5.02, + "learning_rate": 9.720615858113794e-05, + "loss": 0.1715, + "step": 4244 + }, + { + "epoch": 5.02, + "learning_rate": 9.71684139936893e-05, + "loss": 0.1864, + "step": 4245 + }, + { + "epoch": 5.02, + "learning_rate": 9.713066980996308e-05, + "loss": 0.1682, + "step": 4246 + }, + { + "epoch": 5.02, + "learning_rate": 9.709292603534072e-05, + "loss": 0.1824, + "step": 4247 + }, + { + "epoch": 5.03, + "learning_rate": 9.705518267520368e-05, + "loss": 0.1741, + "step": 4248 + }, + { + "epoch": 5.03, + "learning_rate": 9.701743973493337e-05, + "loss": 0.1716, + "step": 4249 + }, + { + "epoch": 5.03, + "learning_rate": 9.697969721991114e-05, + "loss": 0.1889, + "step": 4250 + }, + { + "epoch": 5.03, + "learning_rate": 9.694195513551815e-05, + "loss": 0.1823, + "step": 4251 + }, + { + "epoch": 5.03, + "learning_rate": 9.690421348713568e-05, + "loss": 0.186, + "step": 4252 + }, + { + "epoch": 5.03, + "learning_rate": 9.686647228014482e-05, + "loss": 0.1867, + "step": 4253 + }, + { + "epoch": 5.03, + "learning_rate": 9.682873151992668e-05, + "loss": 0.173, + "step": 4254 + }, + { + "epoch": 5.03, + "learning_rate": 9.679099121186222e-05, + "loss": 0.1878, + "step": 4255 + }, + { + "epoch": 5.03, + "learning_rate": 9.675325136133244e-05, + "loss": 0.1812, + "step": 4256 + }, + { + "epoch": 5.04, + "learning_rate": 9.671551197371818e-05, + "loss": 0.1883, + "step": 4257 + }, + { + "epoch": 5.04, + "learning_rate": 9.667777305440029e-05, + "loss": 0.1687, + "step": 4258 + }, + { + "epoch": 5.04, + "learning_rate": 9.664003460875947e-05, + "loss": 0.1788, + "step": 4259 + }, + { + "epoch": 5.04, + "learning_rate": 9.660229664217642e-05, + "loss": 0.1695, + "step": 4260 + }, + { + "epoch": 5.04, + "learning_rate": 9.656455916003176e-05, + "loss": 0.1779, + "step": 4261 + }, + { + "epoch": 5.04, + "learning_rate": 9.652682216770607e-05, + "loss": 0.1718, + "step": 4262 + }, + { + "epoch": 5.04, + "learning_rate": 9.648908567057974e-05, + "loss": 0.1728, + "step": 4263 + }, + { + "epoch": 5.04, + "learning_rate": 9.645134967403319e-05, + "loss": 0.2087, + "step": 4264 + }, + { + "epoch": 5.05, + "learning_rate": 9.64136141834468e-05, + "loss": 0.1742, + "step": 4265 + }, + { + "epoch": 5.05, + "learning_rate": 9.63758792042008e-05, + "loss": 0.1867, + "step": 4266 + }, + { + "epoch": 5.05, + "learning_rate": 9.633814474167535e-05, + "loss": 0.1854, + "step": 4267 + }, + { + "epoch": 5.05, + "learning_rate": 9.630041080125058e-05, + "loss": 0.184, + "step": 4268 + }, + { + "epoch": 5.05, + "learning_rate": 9.626267738830655e-05, + "loss": 0.1796, + "step": 4269 + }, + { + "epoch": 5.05, + "learning_rate": 9.62249445082232e-05, + "loss": 0.1899, + "step": 4270 + }, + { + "epoch": 5.05, + "learning_rate": 9.61872121663804e-05, + "loss": 0.1717, + "step": 4271 + }, + { + "epoch": 5.05, + "learning_rate": 9.614948036815799e-05, + "loss": 0.1754, + "step": 4272 + }, + { + "epoch": 5.06, + "learning_rate": 9.611174911893568e-05, + "loss": 0.1796, + "step": 4273 + }, + { + "epoch": 5.06, + "learning_rate": 9.607401842409317e-05, + "loss": 0.1737, + "step": 4274 + }, + { + "epoch": 5.06, + "learning_rate": 9.603628828900996e-05, + "loss": 0.1813, + "step": 4275 + }, + { + "epoch": 5.06, + "learning_rate": 9.599855871906555e-05, + "loss": 0.1819, + "step": 4276 + }, + { + "epoch": 5.06, + "learning_rate": 9.59608297196394e-05, + "loss": 0.1775, + "step": 4277 + }, + { + "epoch": 5.06, + "learning_rate": 9.592310129611089e-05, + "loss": 0.1786, + "step": 4278 + }, + { + "epoch": 5.06, + "learning_rate": 9.588537345385914e-05, + "loss": 0.1757, + "step": 4279 + }, + { + "epoch": 5.06, + "learning_rate": 9.584764619826339e-05, + "loss": 0.1768, + "step": 4280 + }, + { + "epoch": 5.06, + "learning_rate": 9.580991953470271e-05, + "loss": 0.1705, + "step": 4281 + }, + { + "epoch": 5.07, + "learning_rate": 9.577219346855613e-05, + "loss": 0.1822, + "step": 4282 + }, + { + "epoch": 5.07, + "learning_rate": 9.573446800520253e-05, + "loss": 0.1817, + "step": 4283 + }, + { + "epoch": 5.07, + "learning_rate": 9.569674315002074e-05, + "loss": 0.1814, + "step": 4284 + }, + { + "epoch": 5.07, + "learning_rate": 9.56590189083895e-05, + "loss": 0.1789, + "step": 4285 + }, + { + "epoch": 5.07, + "learning_rate": 9.562129528568753e-05, + "loss": 0.174, + "step": 4286 + }, + { + "epoch": 5.07, + "learning_rate": 9.558357228729327e-05, + "loss": 0.1769, + "step": 4287 + }, + { + "epoch": 5.07, + "learning_rate": 9.554584991858528e-05, + "loss": 0.1748, + "step": 4288 + }, + { + "epoch": 5.07, + "learning_rate": 9.550812818494194e-05, + "loss": 0.1752, + "step": 4289 + }, + { + "epoch": 5.08, + "learning_rate": 9.547040709174159e-05, + "loss": 0.171, + "step": 4290 + }, + { + "epoch": 5.08, + "learning_rate": 9.543268664436233e-05, + "loss": 0.1822, + "step": 4291 + }, + { + "epoch": 5.08, + "learning_rate": 9.539496684818233e-05, + "loss": 0.1746, + "step": 4292 + }, + { + "epoch": 5.08, + "learning_rate": 9.535724770857962e-05, + "loss": 0.1729, + "step": 4293 + }, + { + "epoch": 5.08, + "learning_rate": 9.531952923093211e-05, + "loss": 0.1888, + "step": 4294 + }, + { + "epoch": 5.08, + "learning_rate": 9.528181142061763e-05, + "loss": 0.1786, + "step": 4295 + }, + { + "epoch": 5.08, + "learning_rate": 9.524409428301392e-05, + "loss": 0.202, + "step": 4296 + }, + { + "epoch": 5.08, + "learning_rate": 9.520637782349863e-05, + "loss": 0.1846, + "step": 4297 + }, + { + "epoch": 5.09, + "learning_rate": 9.516866204744931e-05, + "loss": 0.1754, + "step": 4298 + }, + { + "epoch": 5.09, + "learning_rate": 9.513094696024338e-05, + "loss": 0.178, + "step": 4299 + }, + { + "epoch": 5.09, + "learning_rate": 9.509323256725821e-05, + "loss": 0.1744, + "step": 4300 + }, + { + "epoch": 5.09, + "learning_rate": 9.505551887387102e-05, + "loss": 0.1791, + "step": 4301 + }, + { + "epoch": 5.09, + "learning_rate": 9.501780588545901e-05, + "loss": 0.1831, + "step": 4302 + }, + { + "epoch": 5.09, + "learning_rate": 9.498009360739925e-05, + "loss": 0.1786, + "step": 4303 + }, + { + "epoch": 5.09, + "learning_rate": 9.494238204506858e-05, + "loss": 0.1861, + "step": 4304 + }, + { + "epoch": 5.09, + "learning_rate": 9.490467120384389e-05, + "loss": 0.1823, + "step": 4305 + }, + { + "epoch": 5.09, + "learning_rate": 9.486696108910198e-05, + "loss": 0.1811, + "step": 4306 + }, + { + "epoch": 5.1, + "learning_rate": 9.482925170621946e-05, + "loss": 0.183, + "step": 4307 + }, + { + "epoch": 5.1, + "learning_rate": 9.479154306057284e-05, + "loss": 0.1857, + "step": 4308 + }, + { + "epoch": 5.1, + "learning_rate": 9.475383515753856e-05, + "loss": 0.1701, + "step": 4309 + }, + { + "epoch": 5.1, + "learning_rate": 9.471612800249296e-05, + "loss": 0.181, + "step": 4310 + }, + { + "epoch": 5.1, + "learning_rate": 9.467842160081225e-05, + "loss": 0.1735, + "step": 4311 + }, + { + "epoch": 5.1, + "learning_rate": 9.464071595787253e-05, + "loss": 0.1959, + "step": 4312 + }, + { + "epoch": 5.1, + "learning_rate": 9.46030110790498e-05, + "loss": 0.1854, + "step": 4313 + }, + { + "epoch": 5.1, + "learning_rate": 9.456530696971999e-05, + "loss": 0.178, + "step": 4314 + }, + { + "epoch": 5.11, + "learning_rate": 9.452760363525887e-05, + "loss": 0.1821, + "step": 4315 + }, + { + "epoch": 5.11, + "learning_rate": 9.448990108104208e-05, + "loss": 0.1774, + "step": 4316 + }, + { + "epoch": 5.11, + "learning_rate": 9.44521993124452e-05, + "loss": 0.1773, + "step": 4317 + }, + { + "epoch": 5.11, + "learning_rate": 9.441449833484368e-05, + "loss": 0.1697, + "step": 4318 + }, + { + "epoch": 5.11, + "learning_rate": 9.437679815361291e-05, + "loss": 0.1658, + "step": 4319 + }, + { + "epoch": 5.11, + "learning_rate": 9.433909877412802e-05, + "loss": 0.1817, + "step": 4320 + }, + { + "epoch": 5.11, + "learning_rate": 9.430140020176416e-05, + "loss": 0.1695, + "step": 4321 + }, + { + "epoch": 5.11, + "learning_rate": 9.426370244189632e-05, + "loss": 0.1691, + "step": 4322 + }, + { + "epoch": 5.12, + "learning_rate": 9.422600549989942e-05, + "loss": 0.1717, + "step": 4323 + }, + { + "epoch": 5.12, + "learning_rate": 9.418830938114816e-05, + "loss": 0.1788, + "step": 4324 + }, + { + "epoch": 5.12, + "learning_rate": 9.415061409101721e-05, + "loss": 0.1773, + "step": 4325 + }, + { + "epoch": 5.12, + "learning_rate": 9.411291963488109e-05, + "loss": 0.1791, + "step": 4326 + }, + { + "epoch": 5.12, + "learning_rate": 9.407522601811425e-05, + "loss": 0.172, + "step": 4327 + }, + { + "epoch": 5.12, + "learning_rate": 9.403753324609091e-05, + "loss": 0.1713, + "step": 4328 + }, + { + "epoch": 5.12, + "learning_rate": 9.399984132418528e-05, + "loss": 0.1958, + "step": 4329 + }, + { + "epoch": 5.12, + "learning_rate": 9.396215025777139e-05, + "loss": 0.1701, + "step": 4330 + }, + { + "epoch": 5.12, + "learning_rate": 9.392446005222321e-05, + "loss": 0.1865, + "step": 4331 + }, + { + "epoch": 5.13, + "learning_rate": 9.388677071291446e-05, + "loss": 0.1773, + "step": 4332 + }, + { + "epoch": 5.13, + "learning_rate": 9.384908224521886e-05, + "loss": 0.168, + "step": 4333 + }, + { + "epoch": 5.13, + "learning_rate": 9.381139465450993e-05, + "loss": 0.1963, + "step": 4334 + }, + { + "epoch": 5.13, + "learning_rate": 9.37737079461612e-05, + "loss": 0.1697, + "step": 4335 + }, + { + "epoch": 5.13, + "learning_rate": 9.373602212554586e-05, + "loss": 0.1748, + "step": 4336 + }, + { + "epoch": 5.13, + "learning_rate": 9.369833719803712e-05, + "loss": 0.1669, + "step": 4337 + }, + { + "epoch": 5.13, + "learning_rate": 9.366065316900805e-05, + "loss": 0.1726, + "step": 4338 + }, + { + "epoch": 5.13, + "learning_rate": 9.362297004383158e-05, + "loss": 0.1778, + "step": 4339 + }, + { + "epoch": 5.14, + "learning_rate": 9.358528782788045e-05, + "loss": 0.1782, + "step": 4340 + }, + { + "epoch": 5.14, + "learning_rate": 9.354760652652734e-05, + "loss": 0.1864, + "step": 4341 + }, + { + "epoch": 5.14, + "learning_rate": 9.35099261451448e-05, + "loss": 0.1761, + "step": 4342 + }, + { + "epoch": 5.14, + "learning_rate": 9.347224668910528e-05, + "loss": 0.1882, + "step": 4343 + }, + { + "epoch": 5.14, + "learning_rate": 9.343456816378092e-05, + "loss": 0.1746, + "step": 4344 + }, + { + "epoch": 5.14, + "learning_rate": 9.339689057454393e-05, + "loss": 0.2131, + "step": 4345 + }, + { + "epoch": 5.14, + "learning_rate": 9.335921392676631e-05, + "loss": 0.1772, + "step": 4346 + }, + { + "epoch": 5.14, + "learning_rate": 9.332153822582e-05, + "loss": 0.1787, + "step": 4347 + }, + { + "epoch": 5.15, + "learning_rate": 9.32838634770766e-05, + "loss": 0.1713, + "step": 4348 + }, + { + "epoch": 5.15, + "learning_rate": 9.324618968590776e-05, + "loss": 0.1885, + "step": 4349 + }, + { + "epoch": 5.15, + "learning_rate": 9.320851685768497e-05, + "loss": 0.1783, + "step": 4350 + }, + { + "epoch": 5.15, + "learning_rate": 9.317084499777956e-05, + "loss": 0.1833, + "step": 4351 + }, + { + "epoch": 5.15, + "learning_rate": 9.313317411156264e-05, + "loss": 0.1849, + "step": 4352 + }, + { + "epoch": 5.15, + "learning_rate": 9.309550420440532e-05, + "loss": 0.1753, + "step": 4353 + }, + { + "epoch": 5.15, + "learning_rate": 9.305783528167849e-05, + "loss": 0.1873, + "step": 4354 + }, + { + "epoch": 5.15, + "learning_rate": 9.302016734875292e-05, + "loss": 0.1798, + "step": 4355 + }, + { + "epoch": 5.15, + "learning_rate": 9.298250041099924e-05, + "loss": 0.1735, + "step": 4356 + }, + { + "epoch": 5.16, + "learning_rate": 9.294483447378792e-05, + "loss": 0.1766, + "step": 4357 + }, + { + "epoch": 5.16, + "learning_rate": 9.29071695424893e-05, + "loss": 0.1683, + "step": 4358 + }, + { + "epoch": 5.16, + "learning_rate": 9.286950562247365e-05, + "loss": 0.1815, + "step": 4359 + }, + { + "epoch": 5.16, + "learning_rate": 9.283184271911089e-05, + "loss": 0.1736, + "step": 4360 + }, + { + "epoch": 5.16, + "learning_rate": 9.279418083777103e-05, + "loss": 0.198, + "step": 4361 + }, + { + "epoch": 5.16, + "learning_rate": 9.275651998382377e-05, + "loss": 0.1734, + "step": 4362 + }, + { + "epoch": 5.16, + "learning_rate": 9.27188601626388e-05, + "loss": 0.1974, + "step": 4363 + }, + { + "epoch": 5.16, + "learning_rate": 9.268120137958551e-05, + "loss": 0.1813, + "step": 4364 + }, + { + "epoch": 5.17, + "learning_rate": 9.264354364003327e-05, + "loss": 0.1823, + "step": 4365 + }, + { + "epoch": 5.17, + "learning_rate": 9.260588694935124e-05, + "loss": 0.1723, + "step": 4366 + }, + { + "epoch": 5.17, + "learning_rate": 9.256823131290844e-05, + "loss": 0.2007, + "step": 4367 + }, + { + "epoch": 5.17, + "learning_rate": 9.253057673607376e-05, + "loss": 0.1844, + "step": 4368 + }, + { + "epoch": 5.17, + "learning_rate": 9.249292322421589e-05, + "loss": 0.1773, + "step": 4369 + }, + { + "epoch": 5.17, + "learning_rate": 9.245527078270341e-05, + "loss": 0.1707, + "step": 4370 + }, + { + "epoch": 5.17, + "learning_rate": 9.241761941690474e-05, + "loss": 0.1769, + "step": 4371 + }, + { + "epoch": 5.17, + "learning_rate": 9.237996913218819e-05, + "loss": 0.1741, + "step": 4372 + }, + { + "epoch": 5.18, + "learning_rate": 9.234231993392177e-05, + "loss": 0.1807, + "step": 4373 + }, + { + "epoch": 5.18, + "learning_rate": 9.230467182747351e-05, + "loss": 0.1746, + "step": 4374 + }, + { + "epoch": 5.18, + "learning_rate": 9.226702481821118e-05, + "loss": 0.1794, + "step": 4375 + }, + { + "epoch": 5.18, + "learning_rate": 9.222937891150249e-05, + "loss": 0.1798, + "step": 4376 + }, + { + "epoch": 5.18, + "learning_rate": 9.21917341127148e-05, + "loss": 0.1682, + "step": 4377 + }, + { + "epoch": 5.18, + "learning_rate": 9.215409042721552e-05, + "loss": 0.196, + "step": 4378 + }, + { + "epoch": 5.18, + "learning_rate": 9.211644786037179e-05, + "loss": 0.1723, + "step": 4379 + }, + { + "epoch": 5.18, + "learning_rate": 9.207880641755065e-05, + "loss": 0.1698, + "step": 4380 + }, + { + "epoch": 5.18, + "learning_rate": 9.204116610411893e-05, + "loss": 0.1717, + "step": 4381 + }, + { + "epoch": 5.19, + "learning_rate": 9.20035269254433e-05, + "loss": 0.1823, + "step": 4382 + }, + { + "epoch": 5.19, + "learning_rate": 9.19658888868903e-05, + "loss": 0.1758, + "step": 4383 + }, + { + "epoch": 5.19, + "learning_rate": 9.192825199382632e-05, + "loss": 0.1773, + "step": 4384 + }, + { + "epoch": 5.19, + "learning_rate": 9.189061625161751e-05, + "loss": 0.1777, + "step": 4385 + }, + { + "epoch": 5.19, + "learning_rate": 9.185298166562994e-05, + "loss": 0.1751, + "step": 4386 + }, + { + "epoch": 5.19, + "learning_rate": 9.181534824122947e-05, + "loss": 0.1813, + "step": 4387 + }, + { + "epoch": 5.19, + "learning_rate": 9.177771598378185e-05, + "loss": 0.1863, + "step": 4388 + }, + { + "epoch": 5.19, + "learning_rate": 9.174008489865253e-05, + "loss": 0.1933, + "step": 4389 + }, + { + "epoch": 5.19, + "eval_loss": 3.8528122901916504, + "eval_runtime": 284.0506, + "eval_samples_per_second": 0.725, + "eval_steps_per_second": 0.725, + "step": 4389 + }, + { + "epoch": 5.2, + "learning_rate": 9.170245499120693e-05, + "loss": 0.1716, + "step": 4390 + }, + { + "epoch": 5.2, + "learning_rate": 9.166482626681024e-05, + "loss": 0.1875, + "step": 4391 + }, + { + "epoch": 5.2, + "learning_rate": 9.162719873082757e-05, + "loss": 0.1881, + "step": 4392 + }, + { + "epoch": 5.2, + "learning_rate": 9.158957238862367e-05, + "loss": 0.1838, + "step": 4393 + }, + { + "epoch": 5.2, + "learning_rate": 9.155194724556331e-05, + "loss": 0.1724, + "step": 4394 + }, + { + "epoch": 5.2, + "learning_rate": 9.151432330701097e-05, + "loss": 0.1859, + "step": 4395 + }, + { + "epoch": 5.2, + "learning_rate": 9.147670057833107e-05, + "loss": 0.1739, + "step": 4396 + }, + { + "epoch": 5.2, + "learning_rate": 9.143907906488772e-05, + "loss": 0.1885, + "step": 4397 + }, + { + "epoch": 5.21, + "learning_rate": 9.140145877204496e-05, + "loss": 0.1828, + "step": 4398 + }, + { + "epoch": 5.21, + "learning_rate": 9.13638397051666e-05, + "loss": 0.1937, + "step": 4399 + }, + { + "epoch": 5.21, + "learning_rate": 9.132622186961637e-05, + "loss": 0.1731, + "step": 4400 + }, + { + "epoch": 5.21, + "learning_rate": 9.128860527075767e-05, + "loss": 0.1721, + "step": 4401 + }, + { + "epoch": 5.21, + "learning_rate": 9.125098991395378e-05, + "loss": 0.1724, + "step": 4402 + }, + { + "epoch": 5.21, + "learning_rate": 9.121337580456793e-05, + "loss": 0.18, + "step": 4403 + }, + { + "epoch": 5.21, + "learning_rate": 9.117576294796307e-05, + "loss": 0.1731, + "step": 4404 + }, + { + "epoch": 5.21, + "learning_rate": 9.11381513495019e-05, + "loss": 0.1728, + "step": 4405 + }, + { + "epoch": 5.21, + "learning_rate": 9.110054101454701e-05, + "loss": 0.1787, + "step": 4406 + }, + { + "epoch": 5.22, + "learning_rate": 9.106293194846087e-05, + "loss": 0.1748, + "step": 4407 + }, + { + "epoch": 5.22, + "learning_rate": 9.102532415660571e-05, + "loss": 0.177, + "step": 4408 + }, + { + "epoch": 5.22, + "learning_rate": 9.098771764434353e-05, + "loss": 0.1896, + "step": 4409 + }, + { + "epoch": 5.22, + "learning_rate": 9.095011241703623e-05, + "loss": 0.1702, + "step": 4410 + }, + { + "epoch": 5.22, + "learning_rate": 9.091250848004549e-05, + "loss": 0.18, + "step": 4411 + }, + { + "epoch": 5.22, + "learning_rate": 9.087490583873284e-05, + "loss": 0.1788, + "step": 4412 + }, + { + "epoch": 5.22, + "learning_rate": 9.083730449845954e-05, + "loss": 0.1782, + "step": 4413 + }, + { + "epoch": 5.22, + "learning_rate": 9.079970446458677e-05, + "loss": 0.1799, + "step": 4414 + }, + { + "epoch": 5.23, + "learning_rate": 9.076210574247543e-05, + "loss": 0.1827, + "step": 4415 + }, + { + "epoch": 5.23, + "learning_rate": 9.072450833748637e-05, + "loss": 0.1981, + "step": 4416 + }, + { + "epoch": 5.23, + "learning_rate": 9.068691225498004e-05, + "loss": 0.1687, + "step": 4417 + }, + { + "epoch": 5.23, + "learning_rate": 9.064931750031688e-05, + "loss": 0.1759, + "step": 4418 + }, + { + "epoch": 5.23, + "learning_rate": 9.061172407885707e-05, + "loss": 0.1934, + "step": 4419 + }, + { + "epoch": 5.23, + "learning_rate": 9.057413199596065e-05, + "loss": 0.1791, + "step": 4420 + }, + { + "epoch": 5.23, + "learning_rate": 9.053654125698738e-05, + "loss": 0.1718, + "step": 4421 + }, + { + "epoch": 5.23, + "learning_rate": 9.049895186729688e-05, + "loss": 0.178, + "step": 4422 + }, + { + "epoch": 5.24, + "learning_rate": 9.046136383224862e-05, + "loss": 0.1848, + "step": 4423 + }, + { + "epoch": 5.24, + "learning_rate": 9.042377715720182e-05, + "loss": 0.1726, + "step": 4424 + }, + { + "epoch": 5.24, + "learning_rate": 9.038619184751549e-05, + "loss": 0.1924, + "step": 4425 + }, + { + "epoch": 5.24, + "learning_rate": 9.034860790854849e-05, + "loss": 0.1786, + "step": 4426 + }, + { + "epoch": 5.24, + "learning_rate": 9.031102534565949e-05, + "loss": 0.1899, + "step": 4427 + }, + { + "epoch": 5.24, + "learning_rate": 9.027344416420695e-05, + "loss": 0.1987, + "step": 4428 + }, + { + "epoch": 5.24, + "learning_rate": 9.023586436954909e-05, + "loss": 0.2179, + "step": 4429 + }, + { + "epoch": 5.24, + "learning_rate": 9.019828596704394e-05, + "loss": 0.1737, + "step": 4430 + }, + { + "epoch": 5.24, + "learning_rate": 9.016070896204943e-05, + "loss": 0.1978, + "step": 4431 + }, + { + "epoch": 5.25, + "learning_rate": 9.01231333599232e-05, + "loss": 0.1794, + "step": 4432 + }, + { + "epoch": 5.25, + "learning_rate": 9.008555916602276e-05, + "loss": 0.1758, + "step": 4433 + }, + { + "epoch": 5.25, + "learning_rate": 9.004798638570527e-05, + "loss": 0.1792, + "step": 4434 + }, + { + "epoch": 5.25, + "learning_rate": 9.001041502432783e-05, + "loss": 0.1782, + "step": 4435 + }, + { + "epoch": 5.25, + "learning_rate": 8.99728450872473e-05, + "loss": 0.186, + "step": 4436 + }, + { + "epoch": 5.25, + "learning_rate": 8.993527657982036e-05, + "loss": 0.1911, + "step": 4437 + }, + { + "epoch": 5.25, + "learning_rate": 8.989770950740344e-05, + "loss": 0.1758, + "step": 4438 + }, + { + "epoch": 5.25, + "learning_rate": 8.986014387535275e-05, + "loss": 0.1783, + "step": 4439 + }, + { + "epoch": 5.26, + "learning_rate": 8.982257968902438e-05, + "loss": 0.1789, + "step": 4440 + }, + { + "epoch": 5.26, + "learning_rate": 8.978501695377415e-05, + "loss": 0.1784, + "step": 4441 + }, + { + "epoch": 5.26, + "learning_rate": 8.974745567495768e-05, + "loss": 0.184, + "step": 4442 + }, + { + "epoch": 5.26, + "learning_rate": 8.970989585793039e-05, + "loss": 0.1774, + "step": 4443 + }, + { + "epoch": 5.26, + "learning_rate": 8.967233750804747e-05, + "loss": 0.1653, + "step": 4444 + }, + { + "epoch": 5.26, + "learning_rate": 8.963478063066402e-05, + "loss": 0.1782, + "step": 4445 + }, + { + "epoch": 5.26, + "learning_rate": 8.959722523113469e-05, + "loss": 0.1687, + "step": 4446 + }, + { + "epoch": 5.26, + "learning_rate": 8.955967131481412e-05, + "loss": 0.1822, + "step": 4447 + }, + { + "epoch": 5.27, + "learning_rate": 8.952211888705668e-05, + "loss": 0.1862, + "step": 4448 + }, + { + "epoch": 5.27, + "learning_rate": 8.948456795321657e-05, + "loss": 0.1959, + "step": 4449 + }, + { + "epoch": 5.27, + "learning_rate": 8.944701851864767e-05, + "loss": 0.1898, + "step": 4450 + }, + { + "epoch": 5.27, + "learning_rate": 8.940947058870373e-05, + "loss": 0.1803, + "step": 4451 + }, + { + "epoch": 5.27, + "learning_rate": 8.937192416873828e-05, + "loss": 0.1812, + "step": 4452 + }, + { + "epoch": 5.27, + "learning_rate": 8.933437926410463e-05, + "loss": 0.1868, + "step": 4453 + }, + { + "epoch": 5.27, + "learning_rate": 8.929683588015582e-05, + "loss": 0.1829, + "step": 4454 + }, + { + "epoch": 5.27, + "learning_rate": 8.925929402224475e-05, + "loss": 0.1803, + "step": 4455 + }, + { + "epoch": 5.27, + "learning_rate": 8.922175369572407e-05, + "loss": 0.1763, + "step": 4456 + }, + { + "epoch": 5.28, + "learning_rate": 8.918421490594623e-05, + "loss": 0.1831, + "step": 4457 + }, + { + "epoch": 5.28, + "learning_rate": 8.914667765826338e-05, + "loss": 0.1721, + "step": 4458 + }, + { + "epoch": 5.28, + "learning_rate": 8.910914195802754e-05, + "loss": 0.1716, + "step": 4459 + }, + { + "epoch": 5.28, + "learning_rate": 8.907160781059052e-05, + "loss": 0.1785, + "step": 4460 + }, + { + "epoch": 5.28, + "learning_rate": 8.903407522130386e-05, + "loss": 0.1745, + "step": 4461 + }, + { + "epoch": 5.28, + "learning_rate": 8.899654419551886e-05, + "loss": 0.1902, + "step": 4462 + }, + { + "epoch": 5.28, + "learning_rate": 8.895901473858663e-05, + "loss": 0.176, + "step": 4463 + }, + { + "epoch": 5.28, + "learning_rate": 8.892148685585805e-05, + "loss": 0.18, + "step": 4464 + }, + { + "epoch": 5.29, + "learning_rate": 8.88839605526838e-05, + "loss": 0.1802, + "step": 4465 + }, + { + "epoch": 5.29, + "learning_rate": 8.88464358344143e-05, + "loss": 0.1953, + "step": 4466 + }, + { + "epoch": 5.29, + "learning_rate": 8.880891270639975e-05, + "loss": 0.1848, + "step": 4467 + }, + { + "epoch": 5.29, + "learning_rate": 8.877139117399014e-05, + "loss": 0.2009, + "step": 4468 + }, + { + "epoch": 5.29, + "learning_rate": 8.873387124253525e-05, + "loss": 0.1784, + "step": 4469 + }, + { + "epoch": 5.29, + "learning_rate": 8.869635291738452e-05, + "loss": 0.1758, + "step": 4470 + }, + { + "epoch": 5.29, + "learning_rate": 8.86588362038873e-05, + "loss": 0.1724, + "step": 4471 + }, + { + "epoch": 5.29, + "learning_rate": 8.862132110739266e-05, + "loss": 0.1668, + "step": 4472 + }, + { + "epoch": 5.3, + "learning_rate": 8.858380763324948e-05, + "loss": 0.1696, + "step": 4473 + }, + { + "epoch": 5.3, + "learning_rate": 8.854629578680624e-05, + "loss": 0.1691, + "step": 4474 + }, + { + "epoch": 5.3, + "learning_rate": 8.85087855734114e-05, + "loss": 0.1742, + "step": 4475 + }, + { + "epoch": 5.3, + "learning_rate": 8.847127699841307e-05, + "loss": 0.1727, + "step": 4476 + }, + { + "epoch": 5.3, + "learning_rate": 8.84337700671592e-05, + "loss": 0.1749, + "step": 4477 + }, + { + "epoch": 5.3, + "learning_rate": 8.839626478499738e-05, + "loss": 0.1822, + "step": 4478 + }, + { + "epoch": 5.3, + "learning_rate": 8.835876115727509e-05, + "loss": 0.1784, + "step": 4479 + }, + { + "epoch": 5.3, + "learning_rate": 8.832125918933954e-05, + "loss": 0.1907, + "step": 4480 + }, + { + "epoch": 5.3, + "learning_rate": 8.82837588865377e-05, + "loss": 0.1825, + "step": 4481 + }, + { + "epoch": 5.31, + "learning_rate": 8.824626025421626e-05, + "loss": 0.1856, + "step": 4482 + }, + { + "epoch": 5.31, + "learning_rate": 8.82087632977217e-05, + "loss": 0.1809, + "step": 4483 + }, + { + "epoch": 5.31, + "learning_rate": 8.81712680224003e-05, + "loss": 0.1737, + "step": 4484 + }, + { + "epoch": 5.31, + "learning_rate": 8.81337744335981e-05, + "loss": 0.186, + "step": 4485 + }, + { + "epoch": 5.31, + "learning_rate": 8.809628253666079e-05, + "loss": 0.1741, + "step": 4486 + }, + { + "epoch": 5.31, + "learning_rate": 8.805879233693393e-05, + "loss": 0.1857, + "step": 4487 + }, + { + "epoch": 5.31, + "learning_rate": 8.802130383976279e-05, + "loss": 0.1741, + "step": 4488 + }, + { + "epoch": 5.31, + "learning_rate": 8.798381705049248e-05, + "loss": 0.1881, + "step": 4489 + }, + { + "epoch": 5.32, + "learning_rate": 8.79463319744677e-05, + "loss": 0.1785, + "step": 4490 + }, + { + "epoch": 5.32, + "learning_rate": 8.790884861703308e-05, + "loss": 0.1839, + "step": 4491 + }, + { + "epoch": 5.32, + "learning_rate": 8.787136698353289e-05, + "loss": 0.1728, + "step": 4492 + }, + { + "epoch": 5.32, + "learning_rate": 8.783388707931122e-05, + "loss": 0.1799, + "step": 4493 + }, + { + "epoch": 5.32, + "learning_rate": 8.779640890971186e-05, + "loss": 0.1759, + "step": 4494 + }, + { + "epoch": 5.32, + "learning_rate": 8.775893248007839e-05, + "loss": 0.1855, + "step": 4495 + }, + { + "epoch": 5.32, + "learning_rate": 8.772145779575413e-05, + "loss": 0.1856, + "step": 4496 + }, + { + "epoch": 5.32, + "learning_rate": 8.768398486208215e-05, + "loss": 0.1792, + "step": 4497 + }, + { + "epoch": 5.33, + "learning_rate": 8.764651368440531e-05, + "loss": 0.1816, + "step": 4498 + }, + { + "epoch": 5.33, + "learning_rate": 8.760904426806612e-05, + "loss": 0.183, + "step": 4499 + }, + { + "epoch": 5.33, + "learning_rate": 8.757157661840693e-05, + "loss": 0.1811, + "step": 4500 + }, + { + "epoch": 5.33, + "learning_rate": 8.753411074076982e-05, + "loss": 0.1745, + "step": 4501 + }, + { + "epoch": 5.33, + "learning_rate": 8.749664664049663e-05, + "loss": 0.1768, + "step": 4502 + }, + { + "epoch": 5.33, + "learning_rate": 8.745918432292887e-05, + "loss": 0.1762, + "step": 4503 + }, + { + "epoch": 5.33, + "learning_rate": 8.742172379340785e-05, + "loss": 0.1864, + "step": 4504 + }, + { + "epoch": 5.33, + "learning_rate": 8.738426505727466e-05, + "loss": 0.1759, + "step": 4505 + }, + { + "epoch": 5.33, + "learning_rate": 8.73468081198701e-05, + "loss": 0.2045, + "step": 4506 + }, + { + "epoch": 5.34, + "learning_rate": 8.730935298653467e-05, + "loss": 0.1714, + "step": 4507 + }, + { + "epoch": 5.34, + "learning_rate": 8.727189966260869e-05, + "loss": 0.1943, + "step": 4508 + }, + { + "epoch": 5.34, + "learning_rate": 8.723444815343217e-05, + "loss": 0.1724, + "step": 4509 + }, + { + "epoch": 5.34, + "learning_rate": 8.719699846434492e-05, + "loss": 0.1679, + "step": 4510 + }, + { + "epoch": 5.34, + "learning_rate": 8.715955060068638e-05, + "loss": 0.1962, + "step": 4511 + }, + { + "epoch": 5.34, + "learning_rate": 8.712210456779584e-05, + "loss": 0.1766, + "step": 4512 + }, + { + "epoch": 5.34, + "learning_rate": 8.708466037101229e-05, + "loss": 0.1864, + "step": 4513 + }, + { + "epoch": 5.34, + "learning_rate": 8.704721801567448e-05, + "loss": 0.1746, + "step": 4514 + }, + { + "epoch": 5.35, + "learning_rate": 8.70097775071208e-05, + "loss": 0.1707, + "step": 4515 + }, + { + "epoch": 5.35, + "learning_rate": 8.69723388506895e-05, + "loss": 0.1717, + "step": 4516 + }, + { + "epoch": 5.35, + "learning_rate": 8.693490205171846e-05, + "loss": 0.183, + "step": 4517 + }, + { + "epoch": 5.35, + "learning_rate": 8.689746711554548e-05, + "loss": 0.1812, + "step": 4518 + }, + { + "epoch": 5.35, + "learning_rate": 8.686003404750785e-05, + "loss": 0.1894, + "step": 4519 + }, + { + "epoch": 5.35, + "learning_rate": 8.682260285294271e-05, + "loss": 0.1726, + "step": 4520 + }, + { + "epoch": 5.35, + "learning_rate": 8.678517353718698e-05, + "loss": 0.1823, + "step": 4521 + }, + { + "epoch": 5.35, + "learning_rate": 8.674774610557728e-05, + "loss": 0.1754, + "step": 4522 + }, + { + "epoch": 5.36, + "learning_rate": 8.671032056344988e-05, + "loss": 0.185, + "step": 4523 + }, + { + "epoch": 5.36, + "learning_rate": 8.667289691614087e-05, + "loss": 0.1828, + "step": 4524 + }, + { + "epoch": 5.36, + "learning_rate": 8.663547516898607e-05, + "loss": 0.2032, + "step": 4525 + }, + { + "epoch": 5.36, + "learning_rate": 8.659805532732103e-05, + "loss": 0.1707, + "step": 4526 + }, + { + "epoch": 5.36, + "learning_rate": 8.656063739648088e-05, + "loss": 0.1816, + "step": 4527 + }, + { + "epoch": 5.36, + "learning_rate": 8.652322138180072e-05, + "loss": 0.1761, + "step": 4528 + }, + { + "epoch": 5.36, + "learning_rate": 8.648580728861521e-05, + "loss": 0.1816, + "step": 4529 + }, + { + "epoch": 5.36, + "learning_rate": 8.644839512225886e-05, + "loss": 0.1782, + "step": 4530 + }, + { + "epoch": 5.36, + "learning_rate": 8.64109848880657e-05, + "loss": 0.1844, + "step": 4531 + }, + { + "epoch": 5.37, + "learning_rate": 8.637357659136967e-05, + "loss": 0.1915, + "step": 4532 + }, + { + "epoch": 5.37, + "learning_rate": 8.63361702375044e-05, + "loss": 0.1759, + "step": 4533 + }, + { + "epoch": 5.37, + "learning_rate": 8.629876583180321e-05, + "loss": 0.1769, + "step": 4534 + }, + { + "epoch": 5.37, + "learning_rate": 8.626136337959914e-05, + "loss": 0.1888, + "step": 4535 + }, + { + "epoch": 5.37, + "learning_rate": 8.622396288622497e-05, + "loss": 0.1784, + "step": 4536 + }, + { + "epoch": 5.37, + "learning_rate": 8.618656435701318e-05, + "loss": 0.1936, + "step": 4537 + }, + { + "epoch": 5.37, + "learning_rate": 8.614916779729603e-05, + "loss": 0.1701, + "step": 4538 + }, + { + "epoch": 5.37, + "learning_rate": 8.611177321240539e-05, + "loss": 0.1861, + "step": 4539 + }, + { + "epoch": 5.38, + "learning_rate": 8.607438060767296e-05, + "loss": 0.1742, + "step": 4540 + }, + { + "epoch": 5.38, + "learning_rate": 8.603698998843009e-05, + "loss": 0.1939, + "step": 4541 + }, + { + "epoch": 5.38, + "learning_rate": 8.59996013600079e-05, + "loss": 0.1835, + "step": 4542 + }, + { + "epoch": 5.38, + "learning_rate": 8.596221472773714e-05, + "loss": 0.1904, + "step": 4543 + }, + { + "epoch": 5.38, + "learning_rate": 8.592483009694834e-05, + "loss": 0.1766, + "step": 4544 + }, + { + "epoch": 5.38, + "learning_rate": 8.588744747297173e-05, + "loss": 0.1765, + "step": 4545 + }, + { + "epoch": 5.38, + "learning_rate": 8.585006686113733e-05, + "loss": 0.1674, + "step": 4546 + }, + { + "epoch": 5.38, + "learning_rate": 8.58126882667747e-05, + "loss": 0.1759, + "step": 4547 + }, + { + "epoch": 5.39, + "learning_rate": 8.577531169521324e-05, + "loss": 0.1673, + "step": 4548 + }, + { + "epoch": 5.39, + "learning_rate": 8.573793715178206e-05, + "loss": 0.1788, + "step": 4549 + }, + { + "epoch": 5.39, + "learning_rate": 8.570056464180998e-05, + "loss": 0.1751, + "step": 4550 + }, + { + "epoch": 5.39, + "learning_rate": 8.566319417062543e-05, + "loss": 0.1804, + "step": 4551 + }, + { + "epoch": 5.39, + "learning_rate": 8.562582574355666e-05, + "loss": 0.1896, + "step": 4552 + }, + { + "epoch": 5.39, + "learning_rate": 8.55884593659316e-05, + "loss": 0.1899, + "step": 4553 + }, + { + "epoch": 5.39, + "learning_rate": 8.55510950430779e-05, + "loss": 0.1898, + "step": 4554 + }, + { + "epoch": 5.39, + "learning_rate": 8.551373278032284e-05, + "loss": 0.2023, + "step": 4555 + }, + { + "epoch": 5.39, + "learning_rate": 8.547637258299348e-05, + "loss": 0.186, + "step": 4556 + }, + { + "epoch": 5.4, + "learning_rate": 8.54390144564166e-05, + "loss": 0.1719, + "step": 4557 + }, + { + "epoch": 5.4, + "learning_rate": 8.540165840591867e-05, + "loss": 0.1735, + "step": 4558 + }, + { + "epoch": 5.4, + "learning_rate": 8.53643044368258e-05, + "loss": 0.1762, + "step": 4559 + }, + { + "epoch": 5.4, + "learning_rate": 8.532695255446383e-05, + "loss": 0.167, + "step": 4560 + }, + { + "epoch": 5.4, + "learning_rate": 8.52896027641584e-05, + "loss": 0.1851, + "step": 4561 + }, + { + "epoch": 5.4, + "learning_rate": 8.525225507123471e-05, + "loss": 0.1975, + "step": 4562 + }, + { + "epoch": 5.4, + "learning_rate": 8.521490948101777e-05, + "loss": 0.1665, + "step": 4563 + }, + { + "epoch": 5.4, + "learning_rate": 8.517756599883224e-05, + "loss": 0.1691, + "step": 4564 + }, + { + "epoch": 5.41, + "learning_rate": 8.514022463000244e-05, + "loss": 0.2015, + "step": 4565 + }, + { + "epoch": 5.41, + "learning_rate": 8.51028853798525e-05, + "loss": 0.1806, + "step": 4566 + }, + { + "epoch": 5.41, + "learning_rate": 8.506554825370615e-05, + "loss": 0.1743, + "step": 4567 + }, + { + "epoch": 5.41, + "learning_rate": 8.502821325688684e-05, + "loss": 0.1819, + "step": 4568 + }, + { + "epoch": 5.41, + "learning_rate": 8.499088039471774e-05, + "loss": 0.1787, + "step": 4569 + }, + { + "epoch": 5.41, + "learning_rate": 8.495354967252169e-05, + "loss": 0.1879, + "step": 4570 + }, + { + "epoch": 5.41, + "learning_rate": 8.49162210956213e-05, + "loss": 0.177, + "step": 4571 + }, + { + "epoch": 5.41, + "learning_rate": 8.48788946693387e-05, + "loss": 0.1779, + "step": 4572 + }, + { + "epoch": 5.42, + "learning_rate": 8.48415703989959e-05, + "loss": 0.1748, + "step": 4573 + }, + { + "epoch": 5.42, + "learning_rate": 8.480424828991448e-05, + "loss": 0.1807, + "step": 4574 + }, + { + "epoch": 5.42, + "learning_rate": 8.476692834741585e-05, + "loss": 0.1697, + "step": 4575 + }, + { + "epoch": 5.42, + "learning_rate": 8.472961057682092e-05, + "loss": 0.1974, + "step": 4576 + }, + { + "epoch": 5.42, + "learning_rate": 8.469229498345042e-05, + "loss": 0.1773, + "step": 4577 + }, + { + "epoch": 5.42, + "learning_rate": 8.465498157262474e-05, + "loss": 0.1737, + "step": 4578 + }, + { + "epoch": 5.42, + "learning_rate": 8.4617670349664e-05, + "loss": 0.1799, + "step": 4579 + }, + { + "epoch": 5.42, + "learning_rate": 8.458036131988792e-05, + "loss": 0.1753, + "step": 4580 + }, + { + "epoch": 5.42, + "learning_rate": 8.454305448861595e-05, + "loss": 0.1718, + "step": 4581 + }, + { + "epoch": 5.43, + "learning_rate": 8.450574986116724e-05, + "loss": 0.1736, + "step": 4582 + }, + { + "epoch": 5.43, + "learning_rate": 8.446844744286068e-05, + "loss": 0.1685, + "step": 4583 + }, + { + "epoch": 5.43, + "learning_rate": 8.443114723901466e-05, + "loss": 0.2072, + "step": 4584 + }, + { + "epoch": 5.43, + "learning_rate": 8.439384925494743e-05, + "loss": 0.5173, + "step": 4585 + }, + { + "epoch": 5.43, + "learning_rate": 8.435655349597689e-05, + "loss": 0.1836, + "step": 4586 + }, + { + "epoch": 5.43, + "learning_rate": 8.431925996742065e-05, + "loss": 0.2294, + "step": 4587 + }, + { + "epoch": 5.43, + "learning_rate": 8.428196867459585e-05, + "loss": 0.1875, + "step": 4588 + }, + { + "epoch": 5.43, + "learning_rate": 8.424467962281945e-05, + "loss": 0.1836, + "step": 4589 + }, + { + "epoch": 5.44, + "learning_rate": 8.420739281740805e-05, + "loss": 0.1751, + "step": 4590 + }, + { + "epoch": 5.44, + "learning_rate": 8.417010826367799e-05, + "loss": 0.1763, + "step": 4591 + }, + { + "epoch": 5.44, + "learning_rate": 8.413282596694516e-05, + "loss": 0.179, + "step": 4592 + }, + { + "epoch": 5.44, + "learning_rate": 8.409554593252523e-05, + "loss": 0.1869, + "step": 4593 + }, + { + "epoch": 5.44, + "learning_rate": 8.405826816573353e-05, + "loss": 0.2023, + "step": 4594 + }, + { + "epoch": 5.44, + "learning_rate": 8.402099267188508e-05, + "loss": 0.1727, + "step": 4595 + }, + { + "epoch": 5.44, + "learning_rate": 8.398371945629448e-05, + "loss": 0.1708, + "step": 4596 + }, + { + "epoch": 5.44, + "learning_rate": 8.394644852427615e-05, + "loss": 0.1766, + "step": 4597 + }, + { + "epoch": 5.45, + "learning_rate": 8.390917988114406e-05, + "loss": 0.1744, + "step": 4598 + }, + { + "epoch": 5.45, + "eval_loss": 3.7694623470306396, + "eval_runtime": 283.9246, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 4598 + }, + { + "epoch": 5.45, + "learning_rate": 8.387191353221198e-05, + "loss": 0.186, + "step": 4599 + }, + { + "epoch": 5.45, + "learning_rate": 8.383464948279319e-05, + "loss": 0.2095, + "step": 4600 + }, + { + "epoch": 5.45, + "learning_rate": 8.379738773820076e-05, + "loss": 0.1839, + "step": 4601 + }, + { + "epoch": 5.45, + "learning_rate": 8.37601283037474e-05, + "loss": 0.1666, + "step": 4602 + }, + { + "epoch": 5.45, + "learning_rate": 8.372287118474553e-05, + "loss": 0.1732, + "step": 4603 + }, + { + "epoch": 5.45, + "learning_rate": 8.368561638650717e-05, + "loss": 0.176, + "step": 4604 + }, + { + "epoch": 5.45, + "learning_rate": 8.364836391434402e-05, + "loss": 0.1774, + "step": 4605 + }, + { + "epoch": 5.45, + "learning_rate": 8.361111377356751e-05, + "loss": 0.1795, + "step": 4606 + }, + { + "epoch": 5.46, + "learning_rate": 8.35738659694887e-05, + "loss": 0.1814, + "step": 4607 + }, + { + "epoch": 5.46, + "learning_rate": 8.353662050741827e-05, + "loss": 0.176, + "step": 4608 + }, + { + "epoch": 5.46, + "learning_rate": 8.349937739266665e-05, + "loss": 0.1716, + "step": 4609 + }, + { + "epoch": 5.46, + "learning_rate": 8.346213663054387e-05, + "loss": 0.1784, + "step": 4610 + }, + { + "epoch": 5.46, + "learning_rate": 8.342489822635971e-05, + "loss": 0.1961, + "step": 4611 + }, + { + "epoch": 5.46, + "learning_rate": 8.338766218542347e-05, + "loss": 0.1948, + "step": 4612 + }, + { + "epoch": 5.46, + "learning_rate": 8.335042851304421e-05, + "loss": 0.1819, + "step": 4613 + }, + { + "epoch": 5.46, + "learning_rate": 8.33131972145307e-05, + "loss": 0.172, + "step": 4614 + }, + { + "epoch": 5.47, + "learning_rate": 8.327596829519132e-05, + "loss": 0.1786, + "step": 4615 + }, + { + "epoch": 5.47, + "learning_rate": 8.3238741760334e-05, + "loss": 0.1888, + "step": 4616 + }, + { + "epoch": 5.47, + "learning_rate": 8.32015176152665e-05, + "loss": 0.1794, + "step": 4617 + }, + { + "epoch": 5.47, + "learning_rate": 8.316429586529615e-05, + "loss": 0.1766, + "step": 4618 + }, + { + "epoch": 5.47, + "learning_rate": 8.312707651573e-05, + "loss": 0.1726, + "step": 4619 + }, + { + "epoch": 5.47, + "learning_rate": 8.308985957187466e-05, + "loss": 0.1708, + "step": 4620 + }, + { + "epoch": 5.47, + "learning_rate": 8.30526450390365e-05, + "loss": 0.204, + "step": 4621 + }, + { + "epoch": 5.47, + "learning_rate": 8.301543292252146e-05, + "loss": 0.18, + "step": 4622 + }, + { + "epoch": 5.48, + "learning_rate": 8.297822322763526e-05, + "loss": 0.1852, + "step": 4623 + }, + { + "epoch": 5.48, + "learning_rate": 8.294101595968304e-05, + "loss": 0.1956, + "step": 4624 + }, + { + "epoch": 5.48, + "learning_rate": 8.290381112396987e-05, + "loss": 0.1782, + "step": 4625 + }, + { + "epoch": 5.48, + "learning_rate": 8.286660872580032e-05, + "loss": 0.1778, + "step": 4626 + }, + { + "epoch": 5.48, + "learning_rate": 8.282940877047864e-05, + "loss": 0.1855, + "step": 4627 + }, + { + "epoch": 5.48, + "learning_rate": 8.279221126330874e-05, + "loss": 0.176, + "step": 4628 + }, + { + "epoch": 5.48, + "learning_rate": 8.275501620959414e-05, + "loss": 0.1777, + "step": 4629 + }, + { + "epoch": 5.48, + "learning_rate": 8.271782361463805e-05, + "loss": 0.1753, + "step": 4630 + }, + { + "epoch": 5.48, + "learning_rate": 8.268063348374334e-05, + "loss": 0.1918, + "step": 4631 + }, + { + "epoch": 5.49, + "learning_rate": 8.264344582221252e-05, + "loss": 0.2044, + "step": 4632 + }, + { + "epoch": 5.49, + "learning_rate": 8.260626063534772e-05, + "loss": 0.1755, + "step": 4633 + }, + { + "epoch": 5.49, + "learning_rate": 8.256907792845072e-05, + "loss": 0.1666, + "step": 4634 + }, + { + "epoch": 5.49, + "learning_rate": 8.253189770682301e-05, + "loss": 0.1709, + "step": 4635 + }, + { + "epoch": 5.49, + "learning_rate": 8.249471997576565e-05, + "loss": 0.1813, + "step": 4636 + }, + { + "epoch": 5.49, + "learning_rate": 8.245754474057937e-05, + "loss": 0.1806, + "step": 4637 + }, + { + "epoch": 5.49, + "learning_rate": 8.242037200656455e-05, + "loss": 0.1723, + "step": 4638 + }, + { + "epoch": 5.49, + "learning_rate": 8.238320177902121e-05, + "loss": 0.1792, + "step": 4639 + }, + { + "epoch": 5.5, + "learning_rate": 8.234603406324908e-05, + "loss": 0.1744, + "step": 4640 + }, + { + "epoch": 5.5, + "learning_rate": 8.230886886454734e-05, + "loss": 0.1718, + "step": 4641 + }, + { + "epoch": 5.5, + "learning_rate": 8.227170618821499e-05, + "loss": 0.1783, + "step": 4642 + }, + { + "epoch": 5.5, + "learning_rate": 8.223454603955065e-05, + "loss": 0.1656, + "step": 4643 + }, + { + "epoch": 5.5, + "learning_rate": 8.219738842385256e-05, + "loss": 0.1761, + "step": 4644 + }, + { + "epoch": 5.5, + "learning_rate": 8.216023334641851e-05, + "loss": 0.1736, + "step": 4645 + }, + { + "epoch": 5.5, + "learning_rate": 8.212308081254605e-05, + "loss": 0.1811, + "step": 4646 + }, + { + "epoch": 5.5, + "learning_rate": 8.208593082753232e-05, + "loss": 0.1806, + "step": 4647 + }, + { + "epoch": 5.51, + "learning_rate": 8.20487833966741e-05, + "loss": 0.1751, + "step": 4648 + }, + { + "epoch": 5.51, + "learning_rate": 8.201163852526779e-05, + "loss": 0.1731, + "step": 4649 + }, + { + "epoch": 5.51, + "learning_rate": 8.197449621860943e-05, + "loss": 0.1744, + "step": 4650 + }, + { + "epoch": 5.51, + "learning_rate": 8.193735648199472e-05, + "loss": 0.1788, + "step": 4651 + }, + { + "epoch": 5.51, + "learning_rate": 8.1900219320719e-05, + "loss": 0.1692, + "step": 4652 + }, + { + "epoch": 5.51, + "learning_rate": 8.186308474007716e-05, + "loss": 0.1795, + "step": 4653 + }, + { + "epoch": 5.51, + "learning_rate": 8.182595274536383e-05, + "loss": 0.1855, + "step": 4654 + }, + { + "epoch": 5.51, + "learning_rate": 8.178882334187319e-05, + "loss": 0.177, + "step": 4655 + }, + { + "epoch": 5.52, + "learning_rate": 8.175169653489915e-05, + "loss": 0.1849, + "step": 4656 + }, + { + "epoch": 5.52, + "learning_rate": 8.171457232973509e-05, + "loss": 0.1645, + "step": 4657 + }, + { + "epoch": 5.52, + "learning_rate": 8.167745073167415e-05, + "loss": 0.1802, + "step": 4658 + }, + { + "epoch": 5.52, + "learning_rate": 8.164033174600905e-05, + "loss": 0.2054, + "step": 4659 + }, + { + "epoch": 5.52, + "learning_rate": 8.16032153780322e-05, + "loss": 0.1804, + "step": 4660 + }, + { + "epoch": 5.52, + "learning_rate": 8.156610163303554e-05, + "loss": 0.1704, + "step": 4661 + }, + { + "epoch": 5.52, + "learning_rate": 8.152899051631065e-05, + "loss": 0.1784, + "step": 4662 + }, + { + "epoch": 5.52, + "learning_rate": 8.149188203314883e-05, + "loss": 0.1733, + "step": 4663 + }, + { + "epoch": 5.52, + "learning_rate": 8.145477618884093e-05, + "loss": 0.1738, + "step": 4664 + }, + { + "epoch": 5.53, + "learning_rate": 8.141767298867738e-05, + "loss": 0.183, + "step": 4665 + }, + { + "epoch": 5.53, + "learning_rate": 8.138057243794833e-05, + "loss": 0.1715, + "step": 4666 + }, + { + "epoch": 5.53, + "learning_rate": 8.134347454194351e-05, + "loss": 0.1915, + "step": 4667 + }, + { + "epoch": 5.53, + "learning_rate": 8.130637930595231e-05, + "loss": 0.1755, + "step": 4668 + }, + { + "epoch": 5.53, + "learning_rate": 8.12692867352636e-05, + "loss": 0.1781, + "step": 4669 + }, + { + "epoch": 5.53, + "learning_rate": 8.123219683516603e-05, + "loss": 0.1676, + "step": 4670 + }, + { + "epoch": 5.53, + "learning_rate": 8.119510961094778e-05, + "loss": 0.1843, + "step": 4671 + }, + { + "epoch": 5.53, + "learning_rate": 8.115802506789679e-05, + "loss": 0.1848, + "step": 4672 + }, + { + "epoch": 5.54, + "learning_rate": 8.112094321130035e-05, + "loss": 0.1824, + "step": 4673 + }, + { + "epoch": 5.54, + "learning_rate": 8.108386404644561e-05, + "loss": 0.1844, + "step": 4674 + }, + { + "epoch": 5.54, + "learning_rate": 8.104678757861925e-05, + "loss": 0.1742, + "step": 4675 + }, + { + "epoch": 5.54, + "learning_rate": 8.100971381310756e-05, + "loss": 0.1749, + "step": 4676 + }, + { + "epoch": 5.54, + "learning_rate": 8.097264275519642e-05, + "loss": 0.1754, + "step": 4677 + }, + { + "epoch": 5.54, + "learning_rate": 8.093557441017139e-05, + "loss": 0.1897, + "step": 4678 + }, + { + "epoch": 5.54, + "learning_rate": 8.089850878331758e-05, + "loss": 0.2023, + "step": 4679 + }, + { + "epoch": 5.54, + "learning_rate": 8.08614458799198e-05, + "loss": 0.1795, + "step": 4680 + }, + { + "epoch": 5.55, + "learning_rate": 8.082438570526232e-05, + "loss": 0.1939, + "step": 4681 + }, + { + "epoch": 5.55, + "learning_rate": 8.078732826462915e-05, + "loss": 0.1919, + "step": 4682 + }, + { + "epoch": 5.55, + "learning_rate": 8.075027356330391e-05, + "loss": 0.1789, + "step": 4683 + }, + { + "epoch": 5.55, + "learning_rate": 8.07132216065698e-05, + "loss": 0.1798, + "step": 4684 + }, + { + "epoch": 5.55, + "learning_rate": 8.067617239970952e-05, + "loss": 0.1861, + "step": 4685 + }, + { + "epoch": 5.55, + "learning_rate": 8.063912594800556e-05, + "loss": 0.1874, + "step": 4686 + }, + { + "epoch": 5.55, + "learning_rate": 8.060208225673992e-05, + "loss": 0.2252, + "step": 4687 + }, + { + "epoch": 5.55, + "learning_rate": 8.056504133119424e-05, + "loss": 0.2101, + "step": 4688 + }, + { + "epoch": 5.55, + "learning_rate": 8.052800317664971e-05, + "loss": 0.1923, + "step": 4689 + }, + { + "epoch": 5.56, + "learning_rate": 8.049096779838719e-05, + "loss": 0.1833, + "step": 4690 + }, + { + "epoch": 5.56, + "learning_rate": 8.04539352016871e-05, + "loss": 0.1755, + "step": 4691 + }, + { + "epoch": 5.56, + "learning_rate": 8.041690539182947e-05, + "loss": 0.1772, + "step": 4692 + }, + { + "epoch": 5.56, + "learning_rate": 8.037987837409402e-05, + "loss": 0.1745, + "step": 4693 + }, + { + "epoch": 5.56, + "learning_rate": 8.03428541537599e-05, + "loss": 0.1801, + "step": 4694 + }, + { + "epoch": 5.56, + "learning_rate": 8.0305832736106e-05, + "loss": 0.1726, + "step": 4695 + }, + { + "epoch": 5.56, + "learning_rate": 8.026881412641073e-05, + "loss": 0.179, + "step": 4696 + }, + { + "epoch": 5.56, + "learning_rate": 8.023179832995225e-05, + "loss": 0.1757, + "step": 4697 + }, + { + "epoch": 5.57, + "learning_rate": 8.019478535200806e-05, + "loss": 0.1813, + "step": 4698 + }, + { + "epoch": 5.57, + "learning_rate": 8.015777519785546e-05, + "loss": 0.1899, + "step": 4699 + }, + { + "epoch": 5.57, + "learning_rate": 8.01207678727713e-05, + "loss": 0.1797, + "step": 4700 + }, + { + "epoch": 5.57, + "learning_rate": 8.008376338203205e-05, + "loss": 0.1836, + "step": 4701 + }, + { + "epoch": 5.57, + "learning_rate": 8.004676173091368e-05, + "loss": 0.179, + "step": 4702 + }, + { + "epoch": 5.57, + "learning_rate": 8.000976292469183e-05, + "loss": 0.1698, + "step": 4703 + }, + { + "epoch": 5.57, + "learning_rate": 7.997276696864175e-05, + "loss": 0.1911, + "step": 4704 + }, + { + "epoch": 5.57, + "learning_rate": 7.993577386803827e-05, + "loss": 0.1713, + "step": 4705 + }, + { + "epoch": 5.58, + "learning_rate": 7.989878362815573e-05, + "loss": 0.1673, + "step": 4706 + }, + { + "epoch": 5.58, + "learning_rate": 7.98617962542682e-05, + "loss": 0.191, + "step": 4707 + }, + { + "epoch": 5.58, + "learning_rate": 7.982481175164923e-05, + "loss": 0.1736, + "step": 4708 + }, + { + "epoch": 5.58, + "learning_rate": 7.978783012557207e-05, + "loss": 0.1799, + "step": 4709 + }, + { + "epoch": 5.58, + "learning_rate": 7.975085138130938e-05, + "loss": 0.1757, + "step": 4710 + }, + { + "epoch": 5.58, + "learning_rate": 7.971387552413361e-05, + "loss": 0.1794, + "step": 4711 + }, + { + "epoch": 5.58, + "learning_rate": 7.967690255931667e-05, + "loss": 0.1823, + "step": 4712 + }, + { + "epoch": 5.58, + "learning_rate": 7.963993249213017e-05, + "loss": 0.1753, + "step": 4713 + }, + { + "epoch": 5.58, + "learning_rate": 7.960296532784515e-05, + "loss": 0.1684, + "step": 4714 + }, + { + "epoch": 5.59, + "learning_rate": 7.956600107173233e-05, + "loss": 0.1693, + "step": 4715 + }, + { + "epoch": 5.59, + "learning_rate": 7.952903972906204e-05, + "loss": 0.1835, + "step": 4716 + }, + { + "epoch": 5.59, + "learning_rate": 7.949208130510417e-05, + "loss": 0.1808, + "step": 4717 + }, + { + "epoch": 5.59, + "learning_rate": 7.945512580512813e-05, + "loss": 0.183, + "step": 4718 + }, + { + "epoch": 5.59, + "learning_rate": 7.941817323440302e-05, + "loss": 0.1765, + "step": 4719 + }, + { + "epoch": 5.59, + "learning_rate": 7.938122359819746e-05, + "loss": 0.1776, + "step": 4720 + }, + { + "epoch": 5.59, + "learning_rate": 7.934427690177965e-05, + "loss": 0.1898, + "step": 4721 + }, + { + "epoch": 5.59, + "learning_rate": 7.930733315041739e-05, + "loss": 0.187, + "step": 4722 + }, + { + "epoch": 5.6, + "learning_rate": 7.927039234937804e-05, + "loss": 0.1785, + "step": 4723 + }, + { + "epoch": 5.6, + "learning_rate": 7.923345450392856e-05, + "loss": 0.1768, + "step": 4724 + }, + { + "epoch": 5.6, + "learning_rate": 7.919651961933553e-05, + "loss": 0.1766, + "step": 4725 + }, + { + "epoch": 5.6, + "learning_rate": 7.915958770086498e-05, + "loss": 0.1743, + "step": 4726 + }, + { + "epoch": 5.6, + "learning_rate": 7.912265875378262e-05, + "loss": 0.1763, + "step": 4727 + }, + { + "epoch": 5.6, + "learning_rate": 7.908573278335371e-05, + "loss": 0.1819, + "step": 4728 + }, + { + "epoch": 5.6, + "learning_rate": 7.904880979484315e-05, + "loss": 0.1785, + "step": 4729 + }, + { + "epoch": 5.6, + "learning_rate": 7.901188979351526e-05, + "loss": 0.1717, + "step": 4730 + }, + { + "epoch": 5.61, + "learning_rate": 7.897497278463409e-05, + "loss": 0.1737, + "step": 4731 + }, + { + "epoch": 5.61, + "learning_rate": 7.893805877346316e-05, + "loss": 0.1755, + "step": 4732 + }, + { + "epoch": 5.61, + "learning_rate": 7.890114776526564e-05, + "loss": 0.1739, + "step": 4733 + }, + { + "epoch": 5.61, + "learning_rate": 7.88642397653042e-05, + "loss": 0.1754, + "step": 4734 + }, + { + "epoch": 5.61, + "learning_rate": 7.882733477884115e-05, + "loss": 0.1772, + "step": 4735 + }, + { + "epoch": 5.61, + "learning_rate": 7.87904328111383e-05, + "loss": 0.1818, + "step": 4736 + }, + { + "epoch": 5.61, + "learning_rate": 7.875353386745713e-05, + "loss": 0.1781, + "step": 4737 + }, + { + "epoch": 5.61, + "learning_rate": 7.871663795305855e-05, + "loss": 0.1714, + "step": 4738 + }, + { + "epoch": 5.61, + "learning_rate": 7.867974507320311e-05, + "loss": 0.1952, + "step": 4739 + }, + { + "epoch": 5.62, + "learning_rate": 7.864285523315096e-05, + "loss": 0.1803, + "step": 4740 + }, + { + "epoch": 5.62, + "learning_rate": 7.860596843816187e-05, + "loss": 0.1843, + "step": 4741 + }, + { + "epoch": 5.62, + "learning_rate": 7.856908469349495e-05, + "loss": 0.1907, + "step": 4742 + }, + { + "epoch": 5.62, + "learning_rate": 7.853220400440907e-05, + "loss": 0.1848, + "step": 4743 + }, + { + "epoch": 5.62, + "learning_rate": 7.849532637616264e-05, + "loss": 0.1849, + "step": 4744 + }, + { + "epoch": 5.62, + "learning_rate": 7.845845181401358e-05, + "loss": 0.1878, + "step": 4745 + }, + { + "epoch": 5.62, + "learning_rate": 7.84215803232194e-05, + "loss": 0.1849, + "step": 4746 + }, + { + "epoch": 5.62, + "learning_rate": 7.838471190903717e-05, + "loss": 0.1778, + "step": 4747 + }, + { + "epoch": 5.63, + "learning_rate": 7.834784657672353e-05, + "loss": 0.1925, + "step": 4748 + }, + { + "epoch": 5.63, + "learning_rate": 7.831098433153467e-05, + "loss": 0.18, + "step": 4749 + }, + { + "epoch": 5.63, + "learning_rate": 7.827412517872634e-05, + "loss": 0.1823, + "step": 4750 + }, + { + "epoch": 5.63, + "learning_rate": 7.823726912355384e-05, + "loss": 0.1821, + "step": 4751 + }, + { + "epoch": 5.63, + "learning_rate": 7.820041617127205e-05, + "loss": 0.1732, + "step": 4752 + }, + { + "epoch": 5.63, + "learning_rate": 7.816356632713545e-05, + "loss": 0.1758, + "step": 4753 + }, + { + "epoch": 5.63, + "learning_rate": 7.812671959639791e-05, + "loss": 0.1701, + "step": 4754 + }, + { + "epoch": 5.63, + "learning_rate": 7.808987598431303e-05, + "loss": 0.1795, + "step": 4755 + }, + { + "epoch": 5.64, + "learning_rate": 7.805303549613392e-05, + "loss": 0.1732, + "step": 4756 + }, + { + "epoch": 5.64, + "learning_rate": 7.80161981371132e-05, + "loss": 0.1791, + "step": 4757 + }, + { + "epoch": 5.64, + "learning_rate": 7.797936391250314e-05, + "loss": 0.2083, + "step": 4758 + }, + { + "epoch": 5.64, + "learning_rate": 7.794253282755541e-05, + "loss": 0.1894, + "step": 4759 + }, + { + "epoch": 5.64, + "learning_rate": 7.790570488752135e-05, + "loss": 0.1774, + "step": 4760 + }, + { + "epoch": 5.64, + "learning_rate": 7.786888009765185e-05, + "loss": 0.1696, + "step": 4761 + }, + { + "epoch": 5.64, + "learning_rate": 7.78320584631973e-05, + "loss": 0.1693, + "step": 4762 + }, + { + "epoch": 5.64, + "learning_rate": 7.779523998940766e-05, + "loss": 0.1744, + "step": 4763 + }, + { + "epoch": 5.64, + "learning_rate": 7.775842468153242e-05, + "loss": 0.176, + "step": 4764 + }, + { + "epoch": 5.65, + "learning_rate": 7.772161254482068e-05, + "loss": 0.1683, + "step": 4765 + }, + { + "epoch": 5.65, + "learning_rate": 7.768480358452107e-05, + "loss": 0.1765, + "step": 4766 + }, + { + "epoch": 5.65, + "learning_rate": 7.764799780588164e-05, + "loss": 0.1836, + "step": 4767 + }, + { + "epoch": 5.65, + "learning_rate": 7.761119521415016e-05, + "loss": 0.1803, + "step": 4768 + }, + { + "epoch": 5.65, + "learning_rate": 7.757439581457388e-05, + "loss": 0.1745, + "step": 4769 + }, + { + "epoch": 5.65, + "learning_rate": 7.753759961239964e-05, + "loss": 0.1705, + "step": 4770 + }, + { + "epoch": 5.65, + "learning_rate": 7.750080661287366e-05, + "loss": 0.1818, + "step": 4771 + }, + { + "epoch": 5.65, + "learning_rate": 7.74640168212419e-05, + "loss": 0.1837, + "step": 4772 + }, + { + "epoch": 5.66, + "learning_rate": 7.742723024274974e-05, + "loss": 0.1956, + "step": 4773 + }, + { + "epoch": 5.66, + "learning_rate": 7.73904468826422e-05, + "loss": 0.1773, + "step": 4774 + }, + { + "epoch": 5.66, + "learning_rate": 7.735366674616372e-05, + "loss": 0.1798, + "step": 4775 + }, + { + "epoch": 5.66, + "learning_rate": 7.731688983855838e-05, + "loss": 0.1771, + "step": 4776 + }, + { + "epoch": 5.66, + "learning_rate": 7.728011616506976e-05, + "loss": 0.1868, + "step": 4777 + }, + { + "epoch": 5.66, + "learning_rate": 7.7243345730941e-05, + "loss": 0.1735, + "step": 4778 + }, + { + "epoch": 5.66, + "learning_rate": 7.720657854141475e-05, + "loss": 0.1727, + "step": 4779 + }, + { + "epoch": 5.66, + "learning_rate": 7.716981460173319e-05, + "loss": 0.1735, + "step": 4780 + }, + { + "epoch": 5.67, + "learning_rate": 7.713305391713806e-05, + "loss": 0.1809, + "step": 4781 + }, + { + "epoch": 5.67, + "learning_rate": 7.709629649287068e-05, + "loss": 0.2148, + "step": 4782 + }, + { + "epoch": 5.67, + "learning_rate": 7.705954233417179e-05, + "loss": 0.1791, + "step": 4783 + }, + { + "epoch": 5.67, + "learning_rate": 7.702279144628174e-05, + "loss": 0.1848, + "step": 4784 + }, + { + "epoch": 5.67, + "learning_rate": 7.698604383444044e-05, + "loss": 0.1745, + "step": 4785 + }, + { + "epoch": 5.67, + "learning_rate": 7.694929950388732e-05, + "loss": 0.1702, + "step": 4786 + }, + { + "epoch": 5.67, + "learning_rate": 7.691255845986124e-05, + "loss": 0.1764, + "step": 4787 + }, + { + "epoch": 5.67, + "learning_rate": 7.687582070760073e-05, + "loss": 0.171, + "step": 4788 + }, + { + "epoch": 5.67, + "learning_rate": 7.683908625234376e-05, + "loss": 0.1764, + "step": 4789 + }, + { + "epoch": 5.68, + "learning_rate": 7.68023550993279e-05, + "loss": 0.1831, + "step": 4790 + }, + { + "epoch": 5.68, + "learning_rate": 7.676562725379018e-05, + "loss": 0.1914, + "step": 4791 + }, + { + "epoch": 5.68, + "learning_rate": 7.67289027209672e-05, + "loss": 0.1752, + "step": 4792 + }, + { + "epoch": 5.68, + "learning_rate": 7.669218150609507e-05, + "loss": 0.1706, + "step": 4793 + }, + { + "epoch": 5.68, + "learning_rate": 7.66554636144095e-05, + "loss": 0.176, + "step": 4794 + }, + { + "epoch": 5.68, + "learning_rate": 7.661874905114554e-05, + "loss": 0.1786, + "step": 4795 + }, + { + "epoch": 5.68, + "learning_rate": 7.658203782153793e-05, + "loss": 0.1759, + "step": 4796 + }, + { + "epoch": 5.68, + "learning_rate": 7.654532993082092e-05, + "loss": 0.1718, + "step": 4797 + }, + { + "epoch": 5.69, + "learning_rate": 7.650862538422831e-05, + "loss": 0.1704, + "step": 4798 + }, + { + "epoch": 5.69, + "learning_rate": 7.647192418699324e-05, + "loss": 0.1814, + "step": 4799 + }, + { + "epoch": 5.69, + "learning_rate": 7.643522634434856e-05, + "loss": 0.1762, + "step": 4800 + }, + { + "epoch": 5.69, + "learning_rate": 7.639853186152659e-05, + "loss": 0.1777, + "step": 4801 + }, + { + "epoch": 5.69, + "learning_rate": 7.636184074375917e-05, + "loss": 0.1763, + "step": 4802 + }, + { + "epoch": 5.69, + "learning_rate": 7.632515299627763e-05, + "loss": 0.1798, + "step": 4803 + }, + { + "epoch": 5.69, + "learning_rate": 7.628846862431283e-05, + "loss": 0.1874, + "step": 4804 + }, + { + "epoch": 5.69, + "learning_rate": 7.625178763309519e-05, + "loss": 0.1853, + "step": 4805 + }, + { + "epoch": 5.7, + "learning_rate": 7.621511002785467e-05, + "loss": 0.1894, + "step": 4806 + }, + { + "epoch": 5.7, + "learning_rate": 7.617843581382055e-05, + "loss": 0.1831, + "step": 4807 + }, + { + "epoch": 5.7, + "eval_loss": 3.838498115539551, + "eval_runtime": 283.7734, + "eval_samples_per_second": 0.726, + "eval_steps_per_second": 0.726, + "step": 4807 + }, + { + "epoch": 5.7, + "learning_rate": 7.614176499622189e-05, + "loss": 0.1784, + "step": 4808 + }, + { + "epoch": 5.7, + "learning_rate": 7.610509758028712e-05, + "loss": 0.1802, + "step": 4809 + }, + { + "epoch": 5.7, + "learning_rate": 7.606843357124426e-05, + "loss": 0.173, + "step": 4810 + }, + { + "epoch": 5.7, + "learning_rate": 7.603177297432069e-05, + "loss": 0.187, + "step": 4811 + }, + { + "epoch": 5.7, + "learning_rate": 7.59951157947435e-05, + "loss": 0.1684, + "step": 4812 + }, + { + "epoch": 5.7, + "learning_rate": 7.595846203773916e-05, + "loss": 0.1786, + "step": 4813 + }, + { + "epoch": 5.7, + "learning_rate": 7.592181170853373e-05, + "loss": 0.171, + "step": 4814 + }, + { + "epoch": 5.71, + "learning_rate": 7.588516481235271e-05, + "loss": 0.1713, + "step": 4815 + }, + { + "epoch": 5.71, + "learning_rate": 7.584852135442118e-05, + "loss": 0.1772, + "step": 4816 + }, + { + "epoch": 5.71, + "learning_rate": 7.581188133996368e-05, + "loss": 0.1921, + "step": 4817 + }, + { + "epoch": 5.71, + "learning_rate": 7.57752447742043e-05, + "loss": 0.172, + "step": 4818 + }, + { + "epoch": 5.71, + "learning_rate": 7.573861166236658e-05, + "loss": 0.1688, + "step": 4819 + }, + { + "epoch": 5.71, + "learning_rate": 7.570198200967362e-05, + "loss": 0.1703, + "step": 4820 + }, + { + "epoch": 5.71, + "learning_rate": 7.5665355821348e-05, + "loss": 0.1765, + "step": 4821 + }, + { + "epoch": 5.71, + "learning_rate": 7.562873310261183e-05, + "loss": 0.1764, + "step": 4822 + }, + { + "epoch": 5.72, + "learning_rate": 7.559211385868677e-05, + "loss": 0.1923, + "step": 4823 + }, + { + "epoch": 5.72, + "learning_rate": 7.55554980947938e-05, + "loss": 0.1832, + "step": 4824 + }, + { + "epoch": 5.72, + "learning_rate": 7.551888581615357e-05, + "loss": 0.176, + "step": 4825 + }, + { + "epoch": 5.72, + "learning_rate": 7.548227702798624e-05, + "loss": 0.1836, + "step": 4826 + }, + { + "epoch": 5.72, + "learning_rate": 7.544567173551143e-05, + "loss": 0.1821, + "step": 4827 + }, + { + "epoch": 5.72, + "learning_rate": 7.54090699439482e-05, + "loss": 0.1685, + "step": 4828 + }, + { + "epoch": 5.72, + "learning_rate": 7.537247165851518e-05, + "loss": 0.1979, + "step": 4829 + }, + { + "epoch": 5.72, + "learning_rate": 7.533587688443049e-05, + "loss": 0.1765, + "step": 4830 + }, + { + "epoch": 5.73, + "learning_rate": 7.529928562691178e-05, + "loss": 0.1691, + "step": 4831 + }, + { + "epoch": 5.73, + "learning_rate": 7.526269789117612e-05, + "loss": 0.192, + "step": 4832 + }, + { + "epoch": 5.73, + "learning_rate": 7.522611368244016e-05, + "loss": 0.178, + "step": 4833 + }, + { + "epoch": 5.73, + "learning_rate": 7.518953300591997e-05, + "loss": 0.1803, + "step": 4834 + }, + { + "epoch": 5.73, + "learning_rate": 7.515295586683122e-05, + "loss": 0.1909, + "step": 4835 + }, + { + "epoch": 5.73, + "learning_rate": 7.511638227038894e-05, + "loss": 0.1912, + "step": 4836 + }, + { + "epoch": 5.73, + "learning_rate": 7.507981222180776e-05, + "loss": 0.172, + "step": 4837 + }, + { + "epoch": 5.73, + "learning_rate": 7.504324572630177e-05, + "loss": 0.1784, + "step": 4838 + }, + { + "epoch": 5.73, + "learning_rate": 7.500668278908461e-05, + "loss": 0.1769, + "step": 4839 + }, + { + "epoch": 5.74, + "learning_rate": 7.497012341536924e-05, + "loss": 0.171, + "step": 4840 + }, + { + "epoch": 5.74, + "learning_rate": 7.493356761036829e-05, + "loss": 0.1712, + "step": 4841 + }, + { + "epoch": 5.74, + "learning_rate": 7.489701537929384e-05, + "loss": 0.1866, + "step": 4842 + }, + { + "epoch": 5.74, + "learning_rate": 7.486046672735743e-05, + "loss": 0.1782, + "step": 4843 + }, + { + "epoch": 5.74, + "learning_rate": 7.482392165977008e-05, + "loss": 0.1797, + "step": 4844 + }, + { + "epoch": 5.74, + "learning_rate": 7.478738018174234e-05, + "loss": 0.1795, + "step": 4845 + }, + { + "epoch": 5.74, + "learning_rate": 7.47508422984842e-05, + "loss": 0.1756, + "step": 4846 + }, + { + "epoch": 5.74, + "learning_rate": 7.471430801520522e-05, + "loss": 0.1957, + "step": 4847 + }, + { + "epoch": 5.75, + "learning_rate": 7.467777733711434e-05, + "loss": 0.1782, + "step": 4848 + }, + { + "epoch": 5.75, + "learning_rate": 7.464125026942003e-05, + "loss": 0.1986, + "step": 4849 + }, + { + "epoch": 5.75, + "learning_rate": 7.460472681733031e-05, + "loss": 0.1792, + "step": 4850 + }, + { + "epoch": 5.75, + "learning_rate": 7.456820698605263e-05, + "loss": 0.1784, + "step": 4851 + }, + { + "epoch": 5.75, + "learning_rate": 7.453169078079382e-05, + "loss": 0.1795, + "step": 4852 + }, + { + "epoch": 5.75, + "learning_rate": 7.44951782067604e-05, + "loss": 0.1843, + "step": 4853 + }, + { + "epoch": 5.75, + "learning_rate": 7.445866926915818e-05, + "loss": 0.1772, + "step": 4854 + }, + { + "epoch": 5.75, + "learning_rate": 7.442216397319266e-05, + "loss": 0.1709, + "step": 4855 + }, + { + "epoch": 5.76, + "learning_rate": 7.438566232406858e-05, + "loss": 0.1707, + "step": 4856 + }, + { + "epoch": 5.76, + "learning_rate": 7.434916432699033e-05, + "loss": 0.1753, + "step": 4857 + }, + { + "epoch": 5.76, + "learning_rate": 7.431266998716171e-05, + "loss": 0.1781, + "step": 4858 + }, + { + "epoch": 5.76, + "learning_rate": 7.427617930978606e-05, + "loss": 0.1829, + "step": 4859 + }, + { + "epoch": 5.76, + "learning_rate": 7.423969230006609e-05, + "loss": 0.1949, + "step": 4860 + }, + { + "epoch": 5.76, + "learning_rate": 7.42032089632041e-05, + "loss": 0.1678, + "step": 4861 + }, + { + "epoch": 5.76, + "learning_rate": 7.41667293044018e-05, + "loss": 0.1919, + "step": 4862 + }, + { + "epoch": 5.76, + "learning_rate": 7.413025332886044e-05, + "loss": 0.1812, + "step": 4863 + }, + { + "epoch": 5.76, + "learning_rate": 7.409378104178059e-05, + "loss": 0.1851, + "step": 4864 + }, + { + "epoch": 5.77, + "learning_rate": 7.40573124483625e-05, + "loss": 0.1688, + "step": 4865 + }, + { + "epoch": 5.77, + "learning_rate": 7.402084755380574e-05, + "loss": 0.2043, + "step": 4866 + }, + { + "epoch": 5.77, + "learning_rate": 7.398438636330948e-05, + "loss": 0.1748, + "step": 4867 + }, + { + "epoch": 5.77, + "learning_rate": 7.394792888207221e-05, + "loss": 0.1867, + "step": 4868 + }, + { + "epoch": 5.77, + "learning_rate": 7.391147511529202e-05, + "loss": 0.1745, + "step": 4869 + }, + { + "epoch": 5.77, + "learning_rate": 7.387502506816638e-05, + "loss": 0.1715, + "step": 4870 + }, + { + "epoch": 5.77, + "learning_rate": 7.383857874589232e-05, + "loss": 0.1789, + "step": 4871 + }, + { + "epoch": 5.77, + "learning_rate": 7.380213615366627e-05, + "loss": 0.1724, + "step": 4872 + }, + { + "epoch": 5.78, + "learning_rate": 7.376569729668413e-05, + "loss": 0.181, + "step": 4873 + }, + { + "epoch": 5.78, + "learning_rate": 7.372926218014131e-05, + "loss": 0.1782, + "step": 4874 + }, + { + "epoch": 5.78, + "learning_rate": 7.369283080923269e-05, + "loss": 0.1729, + "step": 4875 + }, + { + "epoch": 5.78, + "learning_rate": 7.36564031891525e-05, + "loss": 0.1832, + "step": 4876 + }, + { + "epoch": 5.78, + "learning_rate": 7.361997932509461e-05, + "loss": 0.1864, + "step": 4877 + }, + { + "epoch": 5.78, + "learning_rate": 7.358355922225222e-05, + "loss": 0.1905, + "step": 4878 + }, + { + "epoch": 5.78, + "learning_rate": 7.35471428858181e-05, + "loss": 0.1911, + "step": 4879 + }, + { + "epoch": 5.78, + "learning_rate": 7.351073032098437e-05, + "loss": 0.1809, + "step": 4880 + }, + { + "epoch": 5.79, + "learning_rate": 7.347432153294265e-05, + "loss": 0.1757, + "step": 4881 + }, + { + "epoch": 5.79, + "learning_rate": 7.34379165268841e-05, + "loss": 0.1751, + "step": 4882 + }, + { + "epoch": 5.79, + "learning_rate": 7.340151530799926e-05, + "loss": 0.1772, + "step": 4883 + }, + { + "epoch": 5.79, + "learning_rate": 7.336511788147811e-05, + "loss": 0.1763, + "step": 4884 + }, + { + "epoch": 5.79, + "learning_rate": 7.332872425251018e-05, + "loss": 0.1721, + "step": 4885 + }, + { + "epoch": 5.79, + "learning_rate": 7.329233442628437e-05, + "loss": 0.1848, + "step": 4886 + }, + { + "epoch": 5.79, + "learning_rate": 7.325594840798911e-05, + "loss": 0.1752, + "step": 4887 + }, + { + "epoch": 5.79, + "learning_rate": 7.321956620281223e-05, + "loss": 0.1905, + "step": 4888 + }, + { + "epoch": 5.79, + "learning_rate": 7.318318781594106e-05, + "loss": 0.1787, + "step": 4889 + }, + { + "epoch": 5.8, + "learning_rate": 7.314681325256232e-05, + "loss": 0.1739, + "step": 4890 + }, + { + "epoch": 5.8, + "learning_rate": 7.311044251786227e-05, + "loss": 0.171, + "step": 4891 + }, + { + "epoch": 5.8, + "learning_rate": 7.307407561702662e-05, + "loss": 0.1822, + "step": 4892 + }, + { + "epoch": 5.8, + "learning_rate": 7.303771255524038e-05, + "loss": 0.1766, + "step": 4893 + }, + { + "epoch": 5.8, + "learning_rate": 7.300135333768821e-05, + "loss": 0.1742, + "step": 4894 + }, + { + "epoch": 5.8, + "learning_rate": 7.296499796955414e-05, + "loss": 0.1805, + "step": 4895 + }, + { + "epoch": 5.8, + "learning_rate": 7.292864645602169e-05, + "loss": 0.1762, + "step": 4896 + }, + { + "epoch": 5.8, + "learning_rate": 7.28922988022737e-05, + "loss": 0.1718, + "step": 4897 + }, + { + "epoch": 5.81, + "learning_rate": 7.285595501349258e-05, + "loss": 0.175, + "step": 4898 + }, + { + "epoch": 5.81, + "learning_rate": 7.28196150948602e-05, + "loss": 0.1846, + "step": 4899 + }, + { + "epoch": 5.81, + "learning_rate": 7.278327905155783e-05, + "loss": 0.1742, + "step": 4900 + }, + { + "epoch": 5.81, + "learning_rate": 7.274694688876616e-05, + "loss": 0.179, + "step": 4901 + }, + { + "epoch": 5.81, + "learning_rate": 7.271061861166539e-05, + "loss": 0.1791, + "step": 4902 + }, + { + "epoch": 5.81, + "learning_rate": 7.267429422543514e-05, + "loss": 0.1721, + "step": 4903 + }, + { + "epoch": 5.81, + "learning_rate": 7.263797373525451e-05, + "loss": 0.1768, + "step": 4904 + }, + { + "epoch": 5.81, + "learning_rate": 7.260165714630195e-05, + "loss": 0.1778, + "step": 4905 + }, + { + "epoch": 5.82, + "learning_rate": 7.256534446375542e-05, + "loss": 0.1717, + "step": 4906 + }, + { + "epoch": 5.82, + "learning_rate": 7.252903569279235e-05, + "loss": 0.1813, + "step": 4907 + }, + { + "epoch": 5.82, + "learning_rate": 7.249273083858961e-05, + "loss": 0.1715, + "step": 4908 + }, + { + "epoch": 5.82, + "learning_rate": 7.245642990632338e-05, + "loss": 0.1691, + "step": 4909 + }, + { + "epoch": 5.82, + "learning_rate": 7.242013290116944e-05, + "loss": 0.1732, + "step": 4910 + }, + { + "epoch": 5.82, + "learning_rate": 7.238383982830292e-05, + "loss": 0.1861, + "step": 4911 + }, + { + "epoch": 5.82, + "learning_rate": 7.23475506928985e-05, + "loss": 0.2166, + "step": 4912 + }, + { + "epoch": 5.82, + "learning_rate": 7.231126550013015e-05, + "loss": 0.1835, + "step": 4913 + }, + { + "epoch": 5.82, + "learning_rate": 7.227498425517134e-05, + "loss": 0.1663, + "step": 4914 + }, + { + "epoch": 5.83, + "learning_rate": 7.2238706963195e-05, + "loss": 0.1761, + "step": 4915 + }, + { + "epoch": 5.83, + "learning_rate": 7.220243362937352e-05, + "loss": 0.1767, + "step": 4916 + }, + { + "epoch": 5.83, + "learning_rate": 7.216616425887863e-05, + "loss": 0.1745, + "step": 4917 + }, + { + "epoch": 5.83, + "learning_rate": 7.212989885688157e-05, + "loss": 0.1798, + "step": 4918 + }, + { + "epoch": 5.83, + "learning_rate": 7.209363742855302e-05, + "loss": 0.1746, + "step": 4919 + }, + { + "epoch": 5.83, + "learning_rate": 7.205737997906307e-05, + "loss": 0.1923, + "step": 4920 + }, + { + "epoch": 5.83, + "learning_rate": 7.202112651358117e-05, + "loss": 0.189, + "step": 4921 + }, + { + "epoch": 5.83, + "learning_rate": 7.198487703727632e-05, + "loss": 0.1773, + "step": 4922 + }, + { + "epoch": 5.84, + "learning_rate": 7.194863155531692e-05, + "loss": 0.1739, + "step": 4923 + }, + { + "epoch": 5.84, + "learning_rate": 7.191239007287081e-05, + "loss": 0.1778, + "step": 4924 + }, + { + "epoch": 5.84, + "learning_rate": 7.187615259510516e-05, + "loss": 0.1753, + "step": 4925 + }, + { + "epoch": 5.84, + "learning_rate": 7.183991912718669e-05, + "loss": 0.1735, + "step": 4926 + }, + { + "epoch": 5.84, + "learning_rate": 7.180368967428149e-05, + "loss": 0.1783, + "step": 4927 + }, + { + "epoch": 5.84, + "learning_rate": 7.176746424155512e-05, + "loss": 0.1808, + "step": 4928 + }, + { + "epoch": 5.84, + "learning_rate": 7.173124283417247e-05, + "loss": 0.1768, + "step": 4929 + }, + { + "epoch": 5.84, + "learning_rate": 7.169502545729797e-05, + "loss": 0.1766, + "step": 4930 + }, + { + "epoch": 5.85, + "learning_rate": 7.165881211609543e-05, + "loss": 0.2007, + "step": 4931 + }, + { + "epoch": 5.85, + "learning_rate": 7.162260281572808e-05, + "loss": 0.1848, + "step": 4932 + }, + { + "epoch": 5.85, + "learning_rate": 7.158639756135854e-05, + "loss": 0.1824, + "step": 4933 + }, + { + "epoch": 5.85, + "learning_rate": 7.155019635814894e-05, + "loss": 0.1754, + "step": 4934 + }, + { + "epoch": 5.85, + "learning_rate": 7.151399921126075e-05, + "loss": 0.1852, + "step": 4935 + }, + { + "epoch": 5.85, + "learning_rate": 7.147780612585495e-05, + "loss": 0.1667, + "step": 4936 + }, + { + "epoch": 5.85, + "learning_rate": 7.144161710709178e-05, + "loss": 0.1794, + "step": 4937 + }, + { + "epoch": 5.85, + "learning_rate": 7.14054321601311e-05, + "loss": 0.1644, + "step": 4938 + }, + { + "epoch": 5.85, + "learning_rate": 7.136925129013203e-05, + "loss": 0.1897, + "step": 4939 + }, + { + "epoch": 5.86, + "learning_rate": 7.133307450225322e-05, + "loss": 0.1778, + "step": 4940 + }, + { + "epoch": 5.86, + "learning_rate": 7.129690180165266e-05, + "loss": 0.1793, + "step": 4941 + }, + { + "epoch": 5.86, + "learning_rate": 7.12607331934878e-05, + "loss": 0.1799, + "step": 4942 + }, + { + "epoch": 5.86, + "learning_rate": 7.122456868291548e-05, + "loss": 0.1688, + "step": 4943 + }, + { + "epoch": 5.86, + "learning_rate": 7.118840827509201e-05, + "loss": 0.1713, + "step": 4944 + }, + { + "epoch": 5.86, + "learning_rate": 7.115225197517304e-05, + "loss": 0.1786, + "step": 4945 + }, + { + "epoch": 5.86, + "learning_rate": 7.111609978831367e-05, + "loss": 0.1736, + "step": 4946 + }, + { + "epoch": 5.86, + "learning_rate": 7.107995171966842e-05, + "loss": 0.1861, + "step": 4947 + }, + { + "epoch": 5.87, + "learning_rate": 7.104380777439127e-05, + "loss": 0.1745, + "step": 4948 + }, + { + "epoch": 5.87, + "learning_rate": 7.100766795763546e-05, + "loss": 0.1896, + "step": 4949 + }, + { + "epoch": 5.87, + "learning_rate": 7.097153227455379e-05, + "loss": 0.1737, + "step": 4950 + }, + { + "epoch": 5.87, + "learning_rate": 7.093540073029839e-05, + "loss": 0.2093, + "step": 4951 + }, + { + "epoch": 5.87, + "learning_rate": 7.089927333002086e-05, + "loss": 0.1994, + "step": 4952 + }, + { + "epoch": 5.87, + "learning_rate": 7.086315007887225e-05, + "loss": 0.1794, + "step": 4953 + }, + { + "epoch": 5.87, + "learning_rate": 7.082703098200282e-05, + "loss": 0.1797, + "step": 4954 + }, + { + "epoch": 5.87, + "learning_rate": 7.079091604456241e-05, + "loss": 0.166, + "step": 4955 + }, + { + "epoch": 5.88, + "learning_rate": 7.075480527170024e-05, + "loss": 0.1787, + "step": 4956 + }, + { + "epoch": 5.88, + "learning_rate": 7.071869866856493e-05, + "loss": 0.1801, + "step": 4957 + }, + { + "epoch": 5.88, + "learning_rate": 7.068259624030444e-05, + "loss": 0.1689, + "step": 4958 + }, + { + "epoch": 5.88, + "learning_rate": 7.064649799206625e-05, + "loss": 0.1789, + "step": 4959 + }, + { + "epoch": 5.88, + "learning_rate": 7.061040392899712e-05, + "loss": 0.174, + "step": 4960 + }, + { + "epoch": 5.88, + "learning_rate": 7.057431405624335e-05, + "loss": 0.1745, + "step": 4961 + }, + { + "epoch": 5.88, + "learning_rate": 7.053822837895051e-05, + "loss": 0.1763, + "step": 4962 + }, + { + "epoch": 5.88, + "learning_rate": 7.050214690226364e-05, + "loss": 0.1777, + "step": 4963 + }, + { + "epoch": 5.88, + "learning_rate": 7.04660696313272e-05, + "loss": 0.1846, + "step": 4964 + }, + { + "epoch": 5.89, + "learning_rate": 7.042999657128503e-05, + "loss": 0.1842, + "step": 4965 + }, + { + "epoch": 5.89, + "learning_rate": 7.03939277272803e-05, + "loss": 0.1839, + "step": 4966 + }, + { + "epoch": 5.89, + "learning_rate": 7.035786310445567e-05, + "loss": 0.1716, + "step": 4967 + }, + { + "epoch": 5.89, + "learning_rate": 7.032180270795317e-05, + "loss": 0.2148, + "step": 4968 + }, + { + "epoch": 5.89, + "learning_rate": 7.028574654291426e-05, + "loss": 0.1774, + "step": 4969 + }, + { + "epoch": 5.89, + "learning_rate": 7.024969461447972e-05, + "loss": 0.1759, + "step": 4970 + }, + { + "epoch": 5.89, + "learning_rate": 7.021364692778977e-05, + "loss": 0.1776, + "step": 4971 + }, + { + "epoch": 5.89, + "learning_rate": 7.017760348798403e-05, + "loss": 0.1946, + "step": 4972 + }, + { + "epoch": 5.9, + "learning_rate": 7.014156430020153e-05, + "loss": 0.1739, + "step": 4973 + }, + { + "epoch": 5.9, + "learning_rate": 7.010552936958066e-05, + "loss": 0.1983, + "step": 4974 + }, + { + "epoch": 5.9, + "learning_rate": 7.006949870125919e-05, + "loss": 0.1863, + "step": 4975 + }, + { + "epoch": 5.9, + "learning_rate": 7.003347230037433e-05, + "loss": 0.1755, + "step": 4976 + }, + { + "epoch": 5.9, + "learning_rate": 6.99974501720627e-05, + "loss": 0.1776, + "step": 4977 + }, + { + "epoch": 5.9, + "learning_rate": 6.99614323214602e-05, + "loss": 0.1922, + "step": 4978 + }, + { + "epoch": 5.9, + "learning_rate": 6.992541875370217e-05, + "loss": 0.1898, + "step": 4979 + }, + { + "epoch": 5.9, + "learning_rate": 6.988940947392344e-05, + "loss": 0.1818, + "step": 4980 + }, + { + "epoch": 5.91, + "learning_rate": 6.985340448725815e-05, + "loss": 0.1911, + "step": 4981 + }, + { + "epoch": 5.91, + "learning_rate": 6.981740379883974e-05, + "loss": 0.1688, + "step": 4982 + }, + { + "epoch": 5.91, + "learning_rate": 6.97814074138012e-05, + "loss": 0.1917, + "step": 4983 + }, + { + "epoch": 5.91, + "learning_rate": 6.974541533727476e-05, + "loss": 0.1944, + "step": 4984 + }, + { + "epoch": 5.91, + "learning_rate": 6.97094275743922e-05, + "loss": 0.187, + "step": 4985 + }, + { + "epoch": 5.91, + "learning_rate": 6.967344413028452e-05, + "loss": 0.1714, + "step": 4986 + }, + { + "epoch": 5.91, + "learning_rate": 6.963746501008217e-05, + "loss": 0.1714, + "step": 4987 + }, + { + "epoch": 5.91, + "learning_rate": 6.9601490218915e-05, + "loss": 0.1814, + "step": 4988 + }, + { + "epoch": 5.91, + "learning_rate": 6.95655197619123e-05, + "loss": 0.1749, + "step": 4989 + }, + { + "epoch": 5.92, + "learning_rate": 6.952955364420255e-05, + "loss": 0.1793, + "step": 4990 + }, + { + "epoch": 5.92, + "learning_rate": 6.94935918709138e-05, + "loss": 0.17, + "step": 4991 + }, + { + "epoch": 5.92, + "learning_rate": 6.945763444717341e-05, + "loss": 0.1717, + "step": 4992 + }, + { + "epoch": 5.92, + "learning_rate": 6.942168137810818e-05, + "loss": 0.1876, + "step": 4993 + }, + { + "epoch": 5.92, + "learning_rate": 6.938573266884413e-05, + "loss": 0.1788, + "step": 4994 + }, + { + "epoch": 5.92, + "learning_rate": 6.93497883245068e-05, + "loss": 0.1773, + "step": 4995 + }, + { + "epoch": 5.92, + "learning_rate": 6.931384835022109e-05, + "loss": 0.1843, + "step": 4996 + }, + { + "epoch": 5.92, + "learning_rate": 6.927791275111126e-05, + "loss": 0.1758, + "step": 4997 + }, + { + "epoch": 5.93, + "learning_rate": 6.924198153230091e-05, + "loss": 0.1831, + "step": 4998 + } + ], + "logging_steps": 1, + "max_steps": 8330, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 833, + "total_flos": 1.7518364490599498e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4998/trainer_state.json:com.dropbox.attrs b/checkpoint-4998/trainer_state.json:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..b506010066e65e7ae68ae7b46ab0441f9d3f17fa Binary files /dev/null and b/checkpoint-4998/trainer_state.json:com.dropbox.attrs differ diff --git a/checkpoint-4998/training_args.bin b/checkpoint-4998/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..b74ebd11d7429fe3b4fc4524a3b2d80be486b207 --- /dev/null +++ b/checkpoint-4998/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008c2f6eb84a5df4b149629ed295f775de2745857ece42b151bce88afb911869 +size 4859 diff --git a/checkpoint-4998/training_args.bin:com.dropbox.attrs b/checkpoint-4998/training_args.bin:com.dropbox.attrs new file mode 100644 index 0000000000000000000000000000000000000000..2d1f4ba5a9acc961b9eae1552fe318689b93101f Binary files /dev/null and b/checkpoint-4998/training_args.bin:com.dropbox.attrs differ diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..de37db9e3fa375d521fc198b3dbfa62a616402c5 --- /dev/null +++ b/config.json @@ -0,0 +1,42 @@ +{ + "_name_or_path": "mistralai/Mixtral-8x7B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": true, + "quantization_config": { + "bnb_4bit_compute_dtype": "bfloat16", + "bnb_4bit_quant_type": "nf4", + "bnb_4bit_use_double_quant": true, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": true, + "load_in_8bit": false, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.02, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.37.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..dbe629867f4501804df6190873dea2329db091b6 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": true, + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}