diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6a1e13161d06d12aa9e59e4ce1e56d4b230f128e --- /dev/null +++ b/README.md @@ -0,0 +1,158 @@ +--- +license: apache-2.0 +library_name: peft +tags: +- generated_from_trainer +base_model: openlm-research/open_llama_3b_v2 +model-index: +- name: lora-out + results: [] +--- + + + +[Built with Axolotl](https://github.com/OpenAccess-AI-Collective/axolotl) +
See axolotl config + +axolotl version: `0.4.0` +```yaml +base_model: openlm-research/open_llama_3b_v2 +model_type: LlamaForCausalLM +tokenizer_type: LlamaTokenizer +load_in_8bit: true +load_in_4bit: false +strict: false +push_dataset_to_hub: +datasets: + - path: teknium/GPT4-LLM-Cleaned + type: alpaca +dataset_prepared_path: +val_set_size: 0.02 +adapter: lora +lora_model_dir: +sequence_len: 1024 +sample_packing: true +lora_r: 8 +lora_alpha: 16 +lora_dropout: 0.0 +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj +lora_fan_in_fan_out: +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: +output_dir: ./lora-out +gradient_accumulation_steps: 1 +micro_batch_size: 2 +num_epochs: 4 +optimizer: adamw_bnb_8bit +torchdistx_path: +lr_scheduler: cosine +learning_rate: 0.0002 +train_on_inputs: false +group_by_length: false +bf16: false +fp16: true +tf32: false +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: true +gptq_groupsize: +s2_attention: +gptq_model_v1: +warmup_steps: 20 +evals_per_epoch: 4 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.1 +fsdp: +fsdp_config: +special_tokens: + bos_token: "" + eos_token: "" + unk_token: "" + +``` + +

+ +# lora-out + +This model is a fine-tuned version of [openlm-research/open_llama_3b_v2](https://huggingface.co/openlm-research/open_llama_3b_v2) on the None dataset. +It achieves the following results on the evaluation set: +- Loss: 1.0041 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 0.0002 +- train_batch_size: 2 +- eval_batch_size: 2 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- total_train_batch_size: 16 +- total_eval_batch_size: 16 +- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08 +- lr_scheduler_type: cosine +- lr_scheduler_warmup_steps: 20 +- num_epochs: 4 +- mixed_precision_training: Native AMP + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | +|:-------------:|:-----:|:----:|:---------------:| +| 1.3745 | 0.0 | 1 | 1.6297 | +| 1.1387 | 0.25 | 168 | 1.0849 | +| 1.0619 | 0.5 | 336 | 1.0484 | +| 0.9686 | 0.75 | 504 | 1.0277 | +| 1.0816 | 1.0 | 672 | 1.0170 | +| 1.0513 | 1.23 | 840 | 1.0088 | +| 1.0814 | 1.48 | 1008 | 1.0041 | +| 1.0275 | 1.73 | 1176 | 0.9929 | +| 0.8872 | 1.98 | 1344 | 0.9883 | +| 0.9351 | 2.21 | 1512 | 0.9985 | +| 0.9077 | 2.46 | 1680 | 0.9968 | +| 0.9494 | 2.71 | 1848 | 0.9907 | +| 0.9596 | 2.96 | 2016 | 0.9916 | +| 0.8771 | 3.19 | 2184 | 1.0012 | +| 0.8912 | 3.44 | 2352 | 1.0041 | +| 0.7828 | 3.69 | 2520 | 1.0041 | + + +### Framework versions + +- PEFT 0.8.2 +- Transformers 4.38.0.dev0 +- Pytorch 2.0.1+cu118 +- Datasets 2.17.0 +- Tokenizers 0.15.0 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a75a5db14cc030f9130cc346972670dcccc55fe2 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "openlm-research/open_llama_3b_v2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..89916f76201a404b3d29414c76c93dc76300ca72 --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03ecef4cb60baefa0bb44d74a88505dba46d4386e0cf0660bd52f6ffe8c24e0a +size 50982397 diff --git a/checkpoint-1342/README.md b/checkpoint-1342/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c40158a9bf29b5b6a4b1c7d97250d59a2f05ed92 --- /dev/null +++ b/checkpoint-1342/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: openlm-research/open_llama_3b_v2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-1342/adapter_config.json b/checkpoint-1342/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a75a5db14cc030f9130cc346972670dcccc55fe2 --- /dev/null +++ b/checkpoint-1342/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "openlm-research/open_llama_3b_v2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1342/adapter_model.safetensors b/checkpoint-1342/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11c663cf1cb1b32d5a605978a45bda20ea169f55 --- /dev/null +++ b/checkpoint-1342/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c65d169607b4203aa341325b6fe9292006df22a513ad4877e712d6c193f9774 +size 50899792 diff --git a/checkpoint-1342/optimizer.pt b/checkpoint-1342/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e8216b928abd7f7c49d13f7bccb042da5d082cd --- /dev/null +++ b/checkpoint-1342/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1553751d6b745ece48ec8a5612421a47b735758a11c9692b1c5252cf9f44ca8c +size 25871439 diff --git a/checkpoint-1342/rng_state_0.pth b/checkpoint-1342/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..8454d83c901223038140f990181b258c510e82d6 --- /dev/null +++ b/checkpoint-1342/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa30b901a76717ad724ad2cdc59b1a9954aae73abfbe679dab0feec6b24e70a +size 21687 diff --git a/checkpoint-1342/rng_state_1.pth b/checkpoint-1342/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..778f61f6b3499ac347c14fe33d0be61c13f6f086 --- /dev/null +++ b/checkpoint-1342/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837265efabb97deee70e7c5bd9612eb60bfe277c2c13f0d5c7074faa43fc4f00 +size 21687 diff --git a/checkpoint-1342/rng_state_2.pth b/checkpoint-1342/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..68b653f75ecc20c88b55b57c0c7db23015e560a4 --- /dev/null +++ b/checkpoint-1342/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:501389bb9fcd278d3c3a2fd08e5beea60698228c4fcd794eae2f6e0820bfaea9 +size 21687 diff --git a/checkpoint-1342/rng_state_3.pth b/checkpoint-1342/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..9444425e3a7e18b007dcc153a1ddcc54e8540a95 --- /dev/null +++ b/checkpoint-1342/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d1563cea741617aad72d62e023a352073715e810ca5cfb1ff7a0c2b16c41a9 +size 21687 diff --git a/checkpoint-1342/rng_state_4.pth b/checkpoint-1342/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbe9bf352d6eef3236033687beac21067fe7db32 --- /dev/null +++ b/checkpoint-1342/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd700607aad5df0f237828338e57527a104be4665e877e73ae19837b366bc298 +size 21687 diff --git a/checkpoint-1342/rng_state_5.pth b/checkpoint-1342/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..16e613035c1857632894012691a773a356b72f82 --- /dev/null +++ b/checkpoint-1342/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb45d48b5228206bfbe250df52624670318c63e699e4b04cfd8fa42c7f64fce +size 21687 diff --git a/checkpoint-1342/rng_state_6.pth b/checkpoint-1342/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea918985c15674f2dafbf647999a37e63f59e0b6 --- /dev/null +++ b/checkpoint-1342/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27412e3de31b0e30e26adbf40c704ecec9b8e455f827e305ca160b278a4365aa +size 21687 diff --git a/checkpoint-1342/rng_state_7.pth b/checkpoint-1342/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..28562df7d80486c65a61ed2c9223982247664629 --- /dev/null +++ b/checkpoint-1342/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b3094e64b5629535e97823f748d5b982e768ec35871adfe4c4805a36fa8404 +size 21687 diff --git a/checkpoint-1342/scheduler.pt b/checkpoint-1342/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b11f32962471b3407b4fe4a28b2f50655f18bb81 --- /dev/null +++ b/checkpoint-1342/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c32c73ec98bf942958f548634a4f273cc399bb3f47a2f1432bdbd87bb217b529 +size 627 diff --git a/checkpoint-1342/trainer_state.json b/checkpoint-1342/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a4865f43cbdcd93c1a5260450133bb924d909514 --- /dev/null +++ b/checkpoint-1342/trainer_state.json @@ -0,0 +1,8137 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9806259314456036, + "eval_steps": 168, + "global_step": 1342, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.3745, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 1.6296857595443726, + "eval_runtime": 2.6662, + "eval_samples_per_second": 409.572, + "eval_steps_per_second": 25.88, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.42, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.3057, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 1.2307, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.289, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.4111, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 7e-05, + "loss": 1.3089, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.3204, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 9e-05, + "loss": 1.3575, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.3279, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011000000000000002, + "loss": 1.3149, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012, + "loss": 1.2578, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013000000000000002, + "loss": 1.2849, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.2971, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015000000000000001, + "loss": 1.1473, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.1943, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017, + "loss": 1.1877, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018, + "loss": 1.1984, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019, + "loss": 1.2647, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.217, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999993046535236, + "loss": 1.0274, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999972186150606, + "loss": 1.2122, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999937418875124, + "loss": 1.1868, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999888744757143, + "loss": 1.2345, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999826163864348, + "loss": 1.2127, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999749676283775, + "loss": 1.2114, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999659282121792, + "loss": 1.2224, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955498150411, + "loss": 1.1517, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999943677457578, + "loss": 1.1631, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999930466150119, + "loss": 1.0465, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999915864246407, + "loss": 1.1847, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999899871766749, + "loss": 1.1238, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999882488733385, + "loss": 1.1491, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199986371517049, + "loss": 1.276, + "step": 34 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999843551104172, + "loss": 1.0911, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998219965624734, + "loss": 1.1276, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997990515753693, + "loss": 1.0981, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997747161747695, + "loss": 1.0901, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999748990394517, + "loss": 1.096, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997218742703887, + "loss": 1.122, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996933678400946, + "loss": 1.1132, + "step": 41 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996634711432786, + "loss": 1.1498, + "step": 42 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996321842215173, + "loss": 1.0708, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999599507118322, + "loss": 1.1154, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995654398791355, + "loss": 1.2118, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995299825513357, + "loss": 1.0919, + "step": 46 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994931351842327, + "loss": 1.1364, + "step": 47 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994548978290695, + "loss": 1.1442, + "step": 48 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999415270539023, + "loss": 1.1248, + "step": 49 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019993742533692022, + "loss": 1.1366, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019993318463766495, + "loss": 1.1437, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199928804962034, + "loss": 1.1191, + "step": 52 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999242863161182, + "loss": 1.0786, + "step": 53 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991962870620153, + "loss": 1.1951, + "step": 54 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991483213876134, + "loss": 1.1321, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019990989662046818, + "loss": 1.0876, + "step": 56 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999048221581858, + "loss": 1.1794, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989960875897126, + "loss": 1.1796, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989425643007476, + "loss": 1.1165, + "step": 59 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998887651789398, + "loss": 1.1978, + "step": 60 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019988313501320297, + "loss": 1.1693, + "step": 61 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019987736594069414, + "loss": 1.1553, + "step": 62 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998714579694363, + "loss": 1.1959, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019986541110764565, + "loss": 1.1945, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985922536373146, + "loss": 1.121, + "step": 65 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985290074629627, + "loss": 1.122, + "step": 66 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019984643726413565, + "loss": 1.1435, + "step": 67 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019983983492623833, + "loss": 1.0413, + "step": 68 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998330937417861, + "loss": 1.078, + "step": 69 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998262137201539, + "loss": 1.0811, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981919487090972, + "loss": 1.1639, + "step": 71 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981203720381463, + "loss": 1.164, + "step": 72 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019980474072882277, + "loss": 1.1006, + "step": 73 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019979730545608126, + "loss": 1.1926, + "step": 74 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001997897313959303, + "loss": 1.1129, + "step": 75 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019978201855890308, + "loss": 1.1367, + "step": 76 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019977416695572578, + "loss": 1.1495, + "step": 77 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997661765973176, + "loss": 1.1567, + "step": 78 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975804749479062, + "loss": 1.2102, + "step": 79 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974977965945, + "loss": 1.1175, + "step": 80 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997413731027937, + "loss": 1.1243, + "step": 81 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973282783651263, + "loss": 1.1406, + "step": 82 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972414387249072, + "loss": 1.09, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019971532122280464, + "loss": 1.0115, + "step": 84 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019970635989972402, + "loss": 1.0328, + "step": 85 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019969725991571128, + "loss": 1.1226, + "step": 86 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019968802128342172, + "loss": 1.0747, + "step": 87 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019967864401570343, + "loss": 1.119, + "step": 88 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019966912812559732, + "loss": 1.1125, + "step": 89 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019965947362633708, + "loss": 1.0734, + "step": 90 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996496805313491, + "loss": 1.1798, + "step": 91 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019963974885425266, + "loss": 1.1461, + "step": 92 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996296786088596, + "loss": 1.0397, + "step": 93 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019961946980917456, + "loss": 1.17, + "step": 94 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019960912246939485, + "loss": 1.0679, + "step": 95 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019959863660391045, + "loss": 1.0839, + "step": 96 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019958801222730394, + "loss": 1.0937, + "step": 97 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019957724935435063, + "loss": 1.1668, + "step": 98 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019956634800001832, + "loss": 1.0858, + "step": 99 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019955530817946748, + "loss": 1.0935, + "step": 100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019954412990805107, + "loss": 1.1046, + "step": 101 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019953281320131468, + "loss": 1.1319, + "step": 102 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019952135807499633, + "loss": 1.1108, + "step": 103 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001995097645450266, + "loss": 1.0485, + "step": 104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019949803262752855, + "loss": 1.0862, + "step": 105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019948616233881768, + "loss": 1.268, + "step": 106 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019947415369540189, + "loss": 1.0926, + "step": 107 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001994620067139815, + "loss": 1.1427, + "step": 108 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019944972141144928, + "loss": 1.0754, + "step": 109 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019943729780489027, + "loss": 1.0044, + "step": 110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001994247359115819, + "loss": 1.1304, + "step": 111 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019941203574899393, + "loss": 1.1683, + "step": 112 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019939919733478838, + "loss": 1.1559, + "step": 113 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019938622068681953, + "loss": 1.1879, + "step": 114 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019937310582313392, + "loss": 1.0613, + "step": 115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993598527619703, + "loss": 1.1196, + "step": 116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993464615217596, + "loss": 1.0762, + "step": 117 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019933293212112495, + "loss": 1.1059, + "step": 118 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019931926457888156, + "loss": 1.0831, + "step": 119 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019930545891403678, + "loss": 1.0552, + "step": 120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019929151514579008, + "loss": 1.15, + "step": 121 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019927743329353295, + "loss": 1.1038, + "step": 122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992632133768489, + "loss": 1.067, + "step": 123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992488554155135, + "loss": 1.1311, + "step": 124 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019923435942949426, + "loss": 1.1402, + "step": 125 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019921972543895066, + "loss": 1.0453, + "step": 126 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019920495346423402, + "loss": 1.1567, + "step": 127 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019919004352588767, + "loss": 1.137, + "step": 128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001991749956446468, + "loss": 0.9986, + "step": 129 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019915980984143832, + "loss": 1.083, + "step": 130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019914448613738106, + "loss": 1.0619, + "step": 131 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019912902455378556, + "loss": 1.1294, + "step": 132 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019911342511215414, + "loss": 1.0965, + "step": 133 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019909768783418086, + "loss": 1.0216, + "step": 134 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019908181274175138, + "loss": 1.0081, + "step": 135 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990657998569432, + "loss": 1.0246, + "step": 136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990496492020252, + "loss": 1.1249, + "step": 137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019903336079945804, + "loss": 1.0518, + "step": 138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019901693467189386, + "loss": 1.189, + "step": 139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019900037084217637, + "loss": 1.1475, + "step": 140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989836693333408, + "loss": 1.2259, + "step": 141 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989668301686138, + "loss": 1.0399, + "step": 142 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989498533714135, + "loss": 1.128, + "step": 143 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019893273896534936, + "loss": 1.014, + "step": 144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001989154869742223, + "loss": 1.1552, + "step": 145 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019889809742202455, + "loss": 1.1159, + "step": 146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988805703329396, + "loss": 1.0218, + "step": 147 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019886290573134228, + "loss": 1.1723, + "step": 148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988451036417986, + "loss": 1.2132, + "step": 149 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019882716408906585, + "loss": 1.112, + "step": 150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001988090870980924, + "loss": 1.0856, + "step": 151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001987908726940178, + "loss": 1.0951, + "step": 152 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019877252090217271, + "loss": 1.0218, + "step": 153 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019875403174807882, + "loss": 1.0552, + "step": 154 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019873540525744887, + "loss": 1.1481, + "step": 155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019871664145618657, + "loss": 1.169, + "step": 156 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019869774037038665, + "loss": 1.0802, + "step": 157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986787020263347, + "loss": 1.0871, + "step": 158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986595264505072, + "loss": 1.1022, + "step": 159 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019864021366957147, + "loss": 1.0257, + "step": 160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986207637103857, + "loss": 1.0986, + "step": 161 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019860117659999878, + "loss": 1.0837, + "step": 162 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019858145236565037, + "loss": 1.1895, + "step": 163 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019856159103477086, + "loss": 1.052, + "step": 164 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019854159263498123, + "loss": 1.1184, + "step": 165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001985214571940931, + "loss": 1.0895, + "step": 166 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019850118474010872, + "loss": 1.0764, + "step": 167 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019848077530122083, + "loss": 1.1387, + "step": 168 + }, + { + "epoch": 0.25, + "eval_loss": 1.084919810295105, + "eval_runtime": 2.6029, + "eval_samples_per_second": 419.538, + "eval_steps_per_second": 26.509, + "step": 168 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019846022890581267, + "loss": 1.0826, + "step": 169 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198439545582458, + "loss": 1.1366, + "step": 170 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198418725359921, + "loss": 1.1349, + "step": 171 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019839776826715614, + "loss": 1.0636, + "step": 172 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019837667433330838, + "loss": 1.1216, + "step": 173 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001983554435877128, + "loss": 1.1051, + "step": 174 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019833407605989494, + "loss": 1.1558, + "step": 175 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019831257177957044, + "loss": 1.0364, + "step": 176 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019829093077664513, + "loss": 1.0665, + "step": 177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019826915308121504, + "loss": 1.1994, + "step": 178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982472387235662, + "loss": 1.1434, + "step": 179 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982251877341748, + "loss": 1.081, + "step": 180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000198203000143707, + "loss": 1.0653, + "step": 181 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981806759830189, + "loss": 1.0269, + "step": 182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981582152831566, + "loss": 1.1167, + "step": 183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019813561807535598, + "loss": 1.0608, + "step": 184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001981128843910428, + "loss": 1.0989, + "step": 185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980900142618327, + "loss": 1.1405, + "step": 186 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019806700771953097, + "loss": 1.0359, + "step": 187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980438647961327, + "loss": 1.1073, + "step": 188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980205855238225, + "loss": 1.0338, + "step": 189 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019799716993497475, + "loss": 1.1285, + "step": 190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019797361806215332, + "loss": 1.1277, + "step": 191 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019794992993811165, + "loss": 1.119, + "step": 192 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019792610559579265, + "loss": 1.1224, + "step": 193 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019790214506832868, + "loss": 1.1438, + "step": 194 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001978780483890414, + "loss": 1.1462, + "step": 195 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019785381559144196, + "loss": 1.042, + "step": 196 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019782944670923076, + "loss": 1.1022, + "step": 197 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019780494177629735, + "loss": 1.0564, + "step": 198 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019778030082672068, + "loss": 1.0471, + "step": 199 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019775552389476864, + "loss": 1.0636, + "step": 200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001977306110148984, + "loss": 1.0917, + "step": 201 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019770556222175608, + "loss": 1.1965, + "step": 202 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019768037755017685, + "loss": 1.073, + "step": 203 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019765505703518496, + "loss": 1.0636, + "step": 204 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019762960071199333, + "loss": 1.087, + "step": 205 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197604008616004, + "loss": 1.0569, + "step": 206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019757828078280766, + "loss": 1.08, + "step": 207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019755241724818387, + "loss": 1.1536, + "step": 208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019752641804810084, + "loss": 1.1514, + "step": 209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019750028321871546, + "loss": 1.0691, + "step": 210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019747401279637325, + "loss": 1.1289, + "step": 211 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019744760681760832, + "loss": 1.0834, + "step": 212 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019742106531914328, + "loss": 1.0762, + "step": 213 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001973943883378892, + "loss": 1.0913, + "step": 214 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019736757591094558, + "loss": 1.132, + "step": 215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019734062807560027, + "loss": 1.0894, + "step": 216 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019731354486932944, + "loss": 1.0327, + "step": 217 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019728632632979746, + "loss": 1.112, + "step": 218 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019725897249485704, + "loss": 1.0718, + "step": 219 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019723148340254892, + "loss": 1.077, + "step": 220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019720385909110198, + "loss": 1.0335, + "step": 221 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019717609959893318, + "loss": 1.0483, + "step": 222 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019714820496464746, + "loss": 1.0901, + "step": 223 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019712017522703764, + "loss": 0.9921, + "step": 224 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019709201042508455, + "loss": 1.0829, + "step": 225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970637105979567, + "loss": 1.0705, + "step": 226 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970352757850105, + "loss": 1.0481, + "step": 227 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019700670602579008, + "loss": 0.9846, + "step": 228 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001969780013600272, + "loss": 1.1492, + "step": 229 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019694916182764113, + "loss": 1.1745, + "step": 230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019692018746873892, + "loss": 1.0451, + "step": 231 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019689107832361496, + "loss": 1.1217, + "step": 232 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019686183443275116, + "loss": 1.0788, + "step": 233 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019683245583681675, + "loss": 1.0703, + "step": 234 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019680294257666837, + "loss": 1.1521, + "step": 235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967732946933499, + "loss": 1.0659, + "step": 236 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019674351222809242, + "loss": 1.0321, + "step": 237 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967135952223142, + "loss": 1.0555, + "step": 238 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019668354371762066, + "loss": 1.0648, + "step": 239 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019665335775580415, + "loss": 1.0723, + "step": 240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001966230373788441, + "loss": 1.0264, + "step": 241 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019659258262890683, + "loss": 1.0331, + "step": 242 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019656199354834558, + "loss": 1.1514, + "step": 243 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019653127017970034, + "loss": 1.069, + "step": 244 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019650041256569792, + "loss": 0.9623, + "step": 245 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019646942074925172, + "loss": 1.0021, + "step": 246 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019643829477346188, + "loss": 1.1131, + "step": 247 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001964070346816151, + "loss": 1.1426, + "step": 248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001963756405171845, + "loss": 1.0761, + "step": 249 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019634411232382978, + "loss": 1.1112, + "step": 250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019631245014539698, + "loss": 1.081, + "step": 251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019628065402591845, + "loss": 1.1446, + "step": 252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019624872400961284, + "loss": 1.045, + "step": 253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019621666014088494, + "loss": 1.0337, + "step": 254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019618446246432583, + "loss": 1.1764, + "step": 255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019615213102471257, + "loss": 1.0323, + "step": 256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019611966586700823, + "loss": 1.0073, + "step": 257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019608706703636188, + "loss": 1.1615, + "step": 258 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019605433457810855, + "loss": 1.1209, + "step": 259 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019602146853776894, + "loss": 1.0721, + "step": 260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001959884689610497, + "loss": 1.0967, + "step": 261 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019595533589384308, + "loss": 1.0284, + "step": 262 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019592206938222703, + "loss": 1.0148, + "step": 263 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019588866947246498, + "loss": 1.1434, + "step": 264 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019585513621100603, + "loss": 1.1125, + "step": 265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001958214696444846, + "loss": 1.0812, + "step": 266 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019578766981972058, + "loss": 1.0611, + "step": 267 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019575373678371909, + "loss": 1.1029, + "step": 268 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019571967058367064, + "loss": 1.0692, + "step": 269 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019568547126695083, + "loss": 1.0581, + "step": 270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019565113888112036, + "loss": 0.9841, + "step": 271 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019561667347392508, + "loss": 1.0173, + "step": 272 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019558207509329584, + "loss": 1.0805, + "step": 273 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019554734378734824, + "loss": 1.088, + "step": 274 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019551247960438296, + "loss": 1.0481, + "step": 275 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019547748259288536, + "loss": 1.1747, + "step": 276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001954423528015255, + "loss": 1.0407, + "step": 277 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019540709027915818, + "loss": 1.1412, + "step": 278 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953716950748227, + "loss": 1.075, + "step": 279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019533616723774294, + "loss": 0.9863, + "step": 280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953005068173272, + "loss": 1.1426, + "step": 281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001952647138631682, + "loss": 1.0621, + "step": 282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019522878842504295, + "loss": 1.1007, + "step": 283 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019519273055291266, + "loss": 1.0632, + "step": 284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019515654029692278, + "loss": 1.126, + "step": 285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019512021770740288, + "loss": 1.0946, + "step": 286 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001950837628348665, + "loss": 1.0639, + "step": 287 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019504717573001117, + "loss": 1.1432, + "step": 288 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019501045644371832, + "loss": 1.0619, + "step": 289 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001949736050270532, + "loss": 1.0597, + "step": 290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019493662153126481, + "loss": 1.0743, + "step": 291 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001948995060077859, + "loss": 1.1114, + "step": 292 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019486225850823266, + "loss": 1.1435, + "step": 293 + }, + { + "epoch": 0.44, + "learning_rate": 0.000194824879084405, + "loss": 1.1396, + "step": 294 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019478736778828624, + "loss": 1.1597, + "step": 295 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019474972467204297, + "loss": 1.0976, + "step": 296 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019471194978802533, + "loss": 1.0829, + "step": 297 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001946740431887665, + "loss": 1.0437, + "step": 298 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019463600492698296, + "loss": 1.0835, + "step": 299 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019459783505557424, + "loss": 1.0558, + "step": 300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001945595336276229, + "loss": 1.0656, + "step": 301 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019452110069639452, + "loss": 1.1487, + "step": 302 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019448253631533744, + "loss": 1.1383, + "step": 303 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019444384053808288, + "loss": 1.1582, + "step": 304 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019440501341844483, + "loss": 0.9999, + "step": 305 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019436605501041987, + "loss": 1.1317, + "step": 306 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019432696536818717, + "loss": 1.0944, + "step": 307 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019428774454610843, + "loss": 1.1624, + "step": 308 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019424839259872778, + "loss": 1.1644, + "step": 309 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019420890958077167, + "loss": 1.0486, + "step": 310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019416929554714888, + "loss": 1.0705, + "step": 311 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019412955055295034, + "loss": 1.023, + "step": 312 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019408967465344917, + "loss": 1.1144, + "step": 313 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019404966790410047, + "loss": 1.0378, + "step": 314 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019400953036054138, + "loss": 1.036, + "step": 315 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019396926207859084, + "loss": 1.0735, + "step": 316 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019392886311424973, + "loss": 1.0259, + "step": 317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001938883335237006, + "loss": 1.1603, + "step": 318 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938476733633076, + "loss": 1.1282, + "step": 319 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938068826896166, + "loss": 1.063, + "step": 320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019376596155935486, + "loss": 1.1176, + "step": 321 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019372491002943112, + "loss": 1.1307, + "step": 322 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019368372815693549, + "loss": 1.0412, + "step": 323 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019364241599913924, + "loss": 1.1353, + "step": 324 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019360097361349494, + "loss": 1.1293, + "step": 325 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001935594010576362, + "loss": 1.0885, + "step": 326 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019351769838937775, + "loss": 1.0944, + "step": 327 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019347586566671512, + "loss": 1.1435, + "step": 328 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001934339029478248, + "loss": 1.1217, + "step": 329 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019339181029106404, + "loss": 1.1801, + "step": 330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019334958775497083, + "loss": 1.1846, + "step": 331 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019330723539826375, + "loss": 1.0897, + "step": 332 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019326475327984192, + "loss": 1.0643, + "step": 333 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019322214145878487, + "loss": 1.0246, + "step": 334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001931793999943526, + "loss": 1.1108, + "step": 335 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019313652894598543, + "loss": 1.0619, + "step": 336 + }, + { + "epoch": 0.5, + "eval_loss": 1.048388123512268, + "eval_runtime": 2.6045, + "eval_samples_per_second": 419.273, + "eval_steps_per_second": 26.493, + "step": 336 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019309352837330372, + "loss": 1.0014, + "step": 337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001930503983361081, + "loss": 1.0786, + "step": 338 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019300713889437926, + "loss": 1.014, + "step": 339 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019296375010827773, + "loss": 1.1233, + "step": 340 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192920232038144, + "loss": 1.1052, + "step": 341 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001928765847444984, + "loss": 1.0138, + "step": 342 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019283280828804081, + "loss": 1.1536, + "step": 343 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019278890272965096, + "loss": 0.992, + "step": 344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001927448681303879, + "loss": 1.1165, + "step": 345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001927007045514903, + "loss": 1.0565, + "step": 346 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019265641205437611, + "loss": 1.0664, + "step": 347 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001926119907006426, + "loss": 1.0625, + "step": 348 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019256744055206622, + "loss": 1.0393, + "step": 349 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001925227616706026, + "loss": 1.125, + "step": 350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019247795411838627, + "loss": 1.0375, + "step": 351 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019243301795773086, + "loss": 1.0648, + "step": 352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001923879532511287, + "loss": 1.0903, + "step": 353 + }, + { + "epoch": 0.53, + "learning_rate": 0.000192342760061251, + "loss": 1.1219, + "step": 354 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019229743845094755, + "loss": 1.054, + "step": 355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001922519884832469, + "loss": 1.1206, + "step": 356 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019220641022135588, + "loss": 1.1125, + "step": 357 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019216070372865996, + "loss": 1.064, + "step": 358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001921148690687228, + "loss": 1.0843, + "step": 359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019206890630528634, + "loss": 1.1378, + "step": 360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019202281550227064, + "loss": 1.0399, + "step": 361 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919765967237739, + "loss": 1.1762, + "step": 362 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919302500340722, + "loss": 1.0538, + "step": 363 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019188377549761963, + "loss": 1.0343, + "step": 364 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001918371731790479, + "loss": 1.1027, + "step": 365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019179044314316664, + "loss": 1.036, + "step": 366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019174358545496288, + "loss": 1.041, + "step": 367 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019169660017960137, + "loss": 1.0762, + "step": 368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019164948738242409, + "loss": 1.0807, + "step": 369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019160224712895055, + "loss": 1.037, + "step": 370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019155487948487748, + "loss": 1.0625, + "step": 371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001915073845160786, + "loss": 1.062, + "step": 372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019145976228860496, + "loss": 1.1882, + "step": 373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019141201286868435, + "loss": 1.1338, + "step": 374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019136413632272163, + "loss": 1.0174, + "step": 375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019131613271729833, + "loss": 1.0585, + "step": 376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019126800211917276, + "loss": 1.0495, + "step": 377 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001912197445952798, + "loss": 1.123, + "step": 378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019117136021273075, + "loss": 1.0517, + "step": 379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001911228490388136, + "loss": 1.0545, + "step": 380 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019107421114099237, + "loss": 1.0302, + "step": 381 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019102544658690748, + "loss": 1.0908, + "step": 382 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019097655544437545, + "loss": 1.1425, + "step": 383 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019092753778138886, + "loss": 1.0686, + "step": 384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001908783936661162, + "loss": 1.06, + "step": 385 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001908291231669019, + "loss": 1.1296, + "step": 386 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019077972635226604, + "loss": 1.1029, + "step": 387 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019073020329090444, + "loss": 1.0469, + "step": 388 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001906805540516885, + "loss": 1.0427, + "step": 389 + }, + { + "epoch": 0.58, + "learning_rate": 0.000190630778703665, + "loss": 1.0075, + "step": 390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019058087731605624, + "loss": 1.1146, + "step": 391 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001905308499582597, + "loss": 1.1161, + "step": 392 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019048069669984802, + "loss": 1.1419, + "step": 393 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019043041761056907, + "loss": 1.1586, + "step": 394 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019038001276034557, + "loss": 1.0765, + "step": 395 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019032948221927524, + "loss": 1.1225, + "step": 396 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001902788260576305, + "loss": 1.0247, + "step": 397 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019022804434585852, + "loss": 1.135, + "step": 398 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901771371545811, + "loss": 1.1122, + "step": 399 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019012610455459446, + "loss": 1.075, + "step": 400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019007494661686935, + "loss": 1.1121, + "step": 401 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001900236634125507, + "loss": 1.0531, + "step": 402 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018997225501295772, + "loss": 1.0561, + "step": 403 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018992072148958368, + "loss": 1.0803, + "step": 404 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018986906291409595, + "loss": 1.0579, + "step": 405 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018981727935833567, + "loss": 1.0614, + "step": 406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001897653708943179, + "loss": 0.9982, + "step": 407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018971333759423142, + "loss": 1.1498, + "step": 408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018966117953043852, + "loss": 1.1165, + "step": 409 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018960889677547505, + "loss": 1.1155, + "step": 410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018955648940205028, + "loss": 1.0017, + "step": 411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018950395748304678, + "loss": 1.0556, + "step": 412 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018945130109152033, + "loss": 1.0248, + "step": 413 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018939852030069981, + "loss": 1.0155, + "step": 414 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934561518398706, + "loss": 1.0248, + "step": 415 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018929258581495685, + "loss": 0.9835, + "step": 416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001892394322673568, + "loss": 1.1602, + "step": 417 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891861546151071, + "loss": 1.021, + "step": 418 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018913275293230069, + "loss": 1.0526, + "step": 419 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018907922729320285, + "loss": 1.0585, + "step": 420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018902557777225135, + "loss": 1.0327, + "step": 421 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018897180444405614, + "loss": 1.0448, + "step": 422 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001889179073833995, + "loss": 1.0776, + "step": 423 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888638866652356, + "loss": 1.0748, + "step": 424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888097423646907, + "loss": 1.0482, + "step": 425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018875547455706295, + "loss": 1.0394, + "step": 426 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018870108331782217, + "loss": 1.0646, + "step": 427 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018864656872260985, + "loss": 1.0338, + "step": 428 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018859193084723913, + "loss": 0.9848, + "step": 429 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001885371697676944, + "loss": 1.0587, + "step": 430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001884822855601316, + "loss": 1.0711, + "step": 431 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018842727830087778, + "loss": 1.0964, + "step": 432 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018837214806643115, + "loss": 1.0254, + "step": 433 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018831689493346095, + "loss": 1.0748, + "step": 434 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018826151897880728, + "loss": 1.0797, + "step": 435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018820602027948114, + "loss": 1.1068, + "step": 436 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018815039891266418, + "loss": 1.081, + "step": 437 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001880946549557086, + "loss": 1.0685, + "step": 438 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018803878848613716, + "loss": 1.0916, + "step": 439 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018798279958164295, + "loss": 1.115, + "step": 440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018792668832008936, + "loss": 1.0048, + "step": 441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001878704547795099, + "loss": 1.0386, + "step": 442 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018781409903810821, + "loss": 1.0283, + "step": 443 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018775762117425777, + "loss": 1.085, + "step": 444 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018770102126650198, + "loss": 1.0582, + "step": 445 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018764429939355392, + "loss": 1.0705, + "step": 446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875874556342963, + "loss": 1.1426, + "step": 447 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018753049006778132, + "loss": 1.0337, + "step": 448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001874734027732306, + "loss": 1.0993, + "step": 449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018741619383003507, + "loss": 1.0661, + "step": 450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018735886331775476, + "loss": 1.0564, + "step": 451 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018730141131611882, + "loss": 1.0989, + "step": 452 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001872438379050254, + "loss": 1.0984, + "step": 453 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018718614316454133, + "loss": 1.1173, + "step": 454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018712832717490235, + "loss": 1.1005, + "step": 455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018707039001651277, + "loss": 1.0008, + "step": 456 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018701233176994533, + "loss": 1.0701, + "step": 457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018695415251594123, + "loss": 1.0831, + "step": 458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018689585233541003, + "loss": 1.1165, + "step": 459 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018683743130942928, + "loss": 1.0884, + "step": 460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018677888951924474, + "loss": 0.9882, + "step": 461 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018672022704627002, + "loss": 1.086, + "step": 462 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018666144397208668, + "loss": 1.0545, + "step": 463 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018660254037844388, + "loss": 1.0274, + "step": 464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001865435163472584, + "loss": 1.0795, + "step": 465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018648437196061462, + "loss": 1.022, + "step": 466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001864251073007642, + "loss": 1.0717, + "step": 467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018636572245012606, + "loss": 1.1501, + "step": 468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001863062174912863, + "loss": 1.1034, + "step": 469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018624659250699805, + "loss": 1.0784, + "step": 470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018618684758018136, + "loss": 1.1274, + "step": 471 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001861269827939231, + "loss": 1.0643, + "step": 472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606699823147676, + "loss": 1.1394, + "step": 473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018600689397626246, + "loss": 0.9665, + "step": 474 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018594667011186678, + "loss": 1.058, + "step": 475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018588632672204264, + "loss": 1.0706, + "step": 476 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858258638907091, + "loss": 1.0414, + "step": 477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576528170195146, + "loss": 1.1, + "step": 478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018570458024002093, + "loss": 1.1114, + "step": 479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018564375958933459, + "loss": 1.0596, + "step": 480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855828198344753, + "loss": 1.0897, + "step": 481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018552176106019155, + "loss": 1.0316, + "step": 482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018546058335139733, + "loss": 1.0516, + "step": 483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001853992867931721, + "loss": 1.0477, + "step": 484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018533787147076048, + "loss": 1.0432, + "step": 485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018527633746957234, + "loss": 1.0568, + "step": 486 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018521468487518264, + "loss": 1.114, + "step": 487 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018515291377333112, + "loss": 1.0664, + "step": 488 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850910242499225, + "loss": 1.0162, + "step": 489 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850290163910261, + "loss": 1.0829, + "step": 490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018496689028287572, + "loss": 1.1078, + "step": 491 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849046460118698, + "loss": 1.0533, + "step": 492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484228366457095, + "loss": 1.0923, + "step": 493 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018477980332770607, + "loss": 1.0516, + "step": 494 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018471720508816614, + "loss": 0.9826, + "step": 495 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018465448903300606, + "loss": 1.1581, + "step": 496 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001845916552494446, + "loss": 1.1268, + "step": 497 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018452870382486432, + "loss": 1.0483, + "step": 498 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018446563484681127, + "loss": 1.1792, + "step": 499 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018440244840299506, + "loss": 1.0918, + "step": 500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001843391445812886, + "loss": 0.9691, + "step": 501 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018427572346972805, + "loss": 1.0581, + "step": 502 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001842121851565128, + "loss": 1.0072, + "step": 503 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018414852973000503, + "loss": 0.9686, + "step": 504 + }, + { + "epoch": 0.75, + "eval_loss": 1.0276715755462646, + "eval_runtime": 2.6054, + "eval_samples_per_second": 419.124, + "eval_steps_per_second": 26.483, + "step": 504 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018408475727872995, + "loss": 1.1221, + "step": 505 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018402086789137546, + "loss": 1.087, + "step": 506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018395686165679202, + "loss": 1.0599, + "step": 507 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018389273866399275, + "loss": 1.1844, + "step": 508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018382849900215294, + "loss": 1.046, + "step": 509 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018376414276061032, + "loss": 0.9691, + "step": 510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018369967002886464, + "loss": 1.0996, + "step": 511 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001836350808965776, + "loss": 1.083, + "step": 512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018357037545357297, + "loss": 1.0371, + "step": 513 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018350555378983608, + "loss": 1.018, + "step": 514 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018344061599551398, + "loss": 1.095, + "step": 515 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018337556216091517, + "loss": 1.0871, + "step": 516 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001833103923765096, + "loss": 1.0774, + "step": 517 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018324510673292842, + "loss": 1.0337, + "step": 518 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001831797053209639, + "loss": 1.0059, + "step": 519 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018311418823156936, + "loss": 1.0744, + "step": 520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018304855555585894, + "loss": 0.9732, + "step": 521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018298280738510752, + "loss": 1.1176, + "step": 522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018291694381075056, + "loss": 1.1485, + "step": 523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018285096492438424, + "loss": 1.1044, + "step": 524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018278487081776476, + "loss": 0.9812, + "step": 525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018271866158280884, + "loss": 1.0966, + "step": 526 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001826523373115931, + "loss": 1.2406, + "step": 527 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001825858980963543, + "loss": 1.0727, + "step": 528 + }, + { + "epoch": 0.79, + "learning_rate": 0.000182519344029489, + "loss": 0.9966, + "step": 529 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018245267520355346, + "loss": 1.081, + "step": 530 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018238589171126353, + "loss": 1.1104, + "step": 531 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018231899364549455, + "loss": 1.0535, + "step": 532 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018225198109928114, + "loss": 1.0801, + "step": 533 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018218485416581726, + "loss": 1.0726, + "step": 534 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018211761293845585, + "loss": 1.0923, + "step": 535 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018205025751070875, + "loss": 1.0551, + "step": 536 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018198278797624675, + "loss": 1.0495, + "step": 537 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001819152044288992, + "loss": 1.0589, + "step": 538 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018184750696265408, + "loss": 1.0487, + "step": 539 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001817796956716578, + "loss": 1.0491, + "step": 540 + }, + { + "epoch": 0.81, + "learning_rate": 0.000181711770650215, + "loss": 1.0981, + "step": 541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018164373199278856, + "loss": 1.1706, + "step": 542 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001815755797939994, + "loss": 1.1024, + "step": 543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150731414862622, + "loss": 1.0488, + "step": 544 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018143893515160564, + "loss": 1.165, + "step": 545 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018137044289803181, + "loss": 1.0346, + "step": 546 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018130183748315645, + "loss": 1.1179, + "step": 547 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001812331190023886, + "loss": 1.0027, + "step": 548 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018116428755129459, + "loss": 1.1106, + "step": 549 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018109534322559783, + "loss": 1.0479, + "step": 550 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018102628612117865, + "loss": 1.0046, + "step": 551 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001809571163340744, + "loss": 0.9883, + "step": 552 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018088783396047893, + "loss": 1.1018, + "step": 553 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018081843909674276, + "loss": 1.1389, + "step": 554 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018074893183937283, + "loss": 1.0751, + "step": 555 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018067931228503246, + "loss": 1.1475, + "step": 556 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018060958053054096, + "loss": 1.0829, + "step": 557 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018053973667287387, + "loss": 1.0272, + "step": 558 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018046978080916252, + "loss": 1.0668, + "step": 559 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018039971303669407, + "loss": 1.0988, + "step": 560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018032953345291123, + "loss": 1.0339, + "step": 561 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001802592421554123, + "loss": 1.0654, + "step": 562 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018018883924195085, + "loss": 1.0157, + "step": 563 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018011832481043576, + "loss": 1.0738, + "step": 564 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001800476989589309, + "loss": 1.0742, + "step": 565 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001799769617856552, + "loss": 0.9861, + "step": 566 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001799061133889823, + "loss": 1.0788, + "step": 567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017983515386744061, + "loss": 1.0539, + "step": 568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017976408331971298, + "loss": 1.0875, + "step": 569 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001796929018446368, + "loss": 1.0765, + "step": 570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017962160954120354, + "loss": 1.1336, + "step": 571 + }, + { + "epoch": 0.85, + "learning_rate": 0.000179550206508559, + "loss": 0.9674, + "step": 572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017947869284600282, + "loss": 1.0607, + "step": 573 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001794070686529886, + "loss": 0.9959, + "step": 574 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017933533402912354, + "loss": 1.038, + "step": 575 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001792634890741685, + "loss": 1.1342, + "step": 576 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017919153388803774, + "loss": 1.0941, + "step": 577 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017911946857079888, + "loss": 1.1286, + "step": 578 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017904729322267256, + "loss": 1.0354, + "step": 579 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001789750079440326, + "loss": 1.1314, + "step": 580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017890261283540562, + "loss": 1.0365, + "step": 581 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017883010799747099, + "loss": 1.091, + "step": 582 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017875749353106062, + "loss": 0.9995, + "step": 583 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178684769537159, + "loss": 1.0435, + "step": 584 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017861193611690287, + "loss": 1.0555, + "step": 585 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017853899337158112, + "loss": 1.0637, + "step": 586 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017846594140263474, + "loss": 1.064, + "step": 587 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017839278031165658, + "loss": 0.9879, + "step": 588 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017831951020039126, + "loss": 1.0846, + "step": 589 + }, + { + "epoch": 0.88, + "learning_rate": 0.000178246131170735, + "loss": 1.0373, + "step": 590 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017817264332473546, + "loss": 1.0377, + "step": 591 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017809904676459177, + "loss": 1.0932, + "step": 592 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017802534159265404, + "loss": 1.085, + "step": 593 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001779515279114236, + "loss": 1.0975, + "step": 594 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001778776058235526, + "loss": 1.1283, + "step": 595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017780357543184397, + "loss": 1.0652, + "step": 596 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017772943683925122, + "loss": 1.0336, + "step": 597 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017765519014887842, + "loss": 0.9761, + "step": 598 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001775808354639799, + "loss": 1.0688, + "step": 599 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017750637288796016, + "loss": 1.1031, + "step": 600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017743180252437383, + "loss": 1.083, + "step": 601 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017735712447692538, + "loss": 1.1612, + "step": 602 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017728233884946903, + "loss": 1.1618, + "step": 603 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017720744574600863, + "loss": 1.144, + "step": 604 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001771324452706975, + "loss": 1.1174, + "step": 605 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017705733752783825, + "loss": 0.9728, + "step": 606 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001769821226218827, + "loss": 1.0599, + "step": 607 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001769068006574317, + "loss": 1.0639, + "step": 608 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017683137173923495, + "loss": 1.1278, + "step": 609 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017675583597219095, + "loss": 0.9925, + "step": 610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766801934613467, + "loss": 1.0457, + "step": 611 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766044443118978, + "loss": 1.0348, + "step": 612 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176528588629188, + "loss": 1.022, + "step": 613 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017645262651870926, + "loss": 1.0027, + "step": 614 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017637655808610156, + "loss": 1.0491, + "step": 615 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017630038343715275, + "loss": 1.0413, + "step": 616 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017622410267779834, + "loss": 1.0358, + "step": 617 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017614771591412148, + "loss": 1.1125, + "step": 618 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017607122325235267, + "loss": 1.1185, + "step": 619 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017599462479886974, + "loss": 1.0738, + "step": 620 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017591792066019765, + "loss": 1.102, + "step": 621 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017584111094300827, + "loss": 1.065, + "step": 622 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001757641957541203, + "loss": 1.0514, + "step": 623 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001756871752004992, + "loss": 1.0396, + "step": 624 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017561004938925688, + "loss": 1.1027, + "step": 625 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017553281842765169, + "loss": 1.0223, + "step": 626 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017545548242308816, + "loss": 1.1793, + "step": 627 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017537804148311695, + "loss": 1.0642, + "step": 628 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017530049571543464, + "loss": 1.0682, + "step": 629 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017522284522788353, + "loss": 1.0476, + "step": 630 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017514509012845164, + "loss": 1.1064, + "step": 631 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017506723052527242, + "loss": 1.0258, + "step": 632 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017498926652662476, + "loss": 1.1954, + "step": 633 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001749111982409325, + "loss": 1.0637, + "step": 634 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017483302577676475, + "loss": 0.9685, + "step": 635 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017475474924283536, + "loss": 1.0465, + "step": 636 + }, + { + "epoch": 0.95, + "learning_rate": 0.000174676368748003, + "loss": 1.0161, + "step": 637 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017459788440127083, + "loss": 1.0479, + "step": 638 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017451929631178648, + "loss": 1.1166, + "step": 639 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001744406045888419, + "loss": 1.0634, + "step": 640 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017436180934187308, + "loss": 1.0826, + "step": 641 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017428291068046, + "loss": 1.07, + "step": 642 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017420390871432647, + "loss": 1.1167, + "step": 643 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017412480355334005, + "loss": 1.0347, + "step": 644 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017404559530751162, + "loss": 1.0393, + "step": 645 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017396628408699555, + "loss": 1.1108, + "step": 646 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017388687000208946, + "loss": 1.006, + "step": 647 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001738073531632339, + "loss": 1.0932, + "step": 648 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001737277336810124, + "loss": 1.0123, + "step": 649 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017364801166615124, + "loss": 1.1273, + "step": 650 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001735681872295192, + "loss": 0.9893, + "step": 651 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001734882604821276, + "loss": 1.0699, + "step": 652 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017340823153513002, + "loss": 1.0901, + "step": 653 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017332810049982208, + "loss": 1.0212, + "step": 654 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017324786748764155, + "loss": 0.9898, + "step": 655 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017316753261016783, + "loss": 1.0899, + "step": 656 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017308709597912213, + "loss": 1.085, + "step": 657 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017300655770636708, + "loss": 1.091, + "step": 658 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017292591790390665, + "loss": 1.0502, + "step": 659 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001728451766838861, + "loss": 1.2131, + "step": 660 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017276433415859167, + "loss": 1.1256, + "step": 661 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017268339044045042, + "loss": 1.0577, + "step": 662 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017260234564203032, + "loss": 1.0012, + "step": 663 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017252119987603973, + "loss": 1.0611, + "step": 664 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017243995325532755, + "loss": 1.1251, + "step": 665 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017235860589288277, + "loss": 1.0959, + "step": 666 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001722771579018347, + "loss": 1.1413, + "step": 667 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017219560939545246, + "loss": 1.0728, + "step": 668 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017211396048714498, + "loss": 1.0461, + "step": 669 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001720322112904608, + "loss": 1.1084, + "step": 670 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017195036191908797, + "loss": 1.1316, + "step": 671 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017186841248685383, + "loss": 1.0816, + "step": 672 + }, + { + "epoch": 1.0, + "eval_loss": 1.0170178413391113, + "eval_runtime": 2.6119, + "eval_samples_per_second": 418.079, + "eval_steps_per_second": 26.417, + "step": 672 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001717863631077249, + "loss": 1.0711, + "step": 673 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017170421389580667, + "loss": 1.1245, + "step": 674 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017162196496534342, + "loss": 1.0519, + "step": 675 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001715396164307182, + "loss": 1.104, + "step": 676 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017145716840645254, + "loss": 1.1193, + "step": 677 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017137462100720631, + "loss": 1.1238, + "step": 678 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017129197434777763, + "loss": 1.004, + "step": 679 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017120922854310257, + "loss": 1.0426, + "step": 680 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017112638370825515, + "loss": 1.0308, + "step": 681 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017104343995844715, + "loss": 1.0892, + "step": 682 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017096039740902784, + "loss": 1.0115, + "step": 683 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017087725617548385, + "loss": 1.1011, + "step": 684 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017079401637343914, + "loss": 0.9829, + "step": 685 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017071067811865476, + "loss": 0.9738, + "step": 686 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001706272415270286, + "loss": 1.0563, + "step": 687 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017054370671459532, + "loss": 1.0153, + "step": 688 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001704600737975262, + "loss": 1.0638, + "step": 689 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170376342892129, + "loss": 1.0053, + "step": 690 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017029251411484765, + "loss": 1.0178, + "step": 691 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017020858758226229, + "loss": 1.0755, + "step": 692 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017012456341108885, + "loss": 0.9365, + "step": 693 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017004044171817925, + "loss": 1.0666, + "step": 694 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016995622262052092, + "loss": 1.041, + "step": 695 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016987190623523674, + "loss": 1.0387, + "step": 696 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016978749267958495, + "loss": 0.9332, + "step": 697 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016970298207095885, + "loss": 1.0737, + "step": 698 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016961837452688676, + "loss": 0.992, + "step": 699 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016953367016503182, + "loss": 0.9997, + "step": 700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016944886910319173, + "loss": 1.1054, + "step": 701 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016936397145929878, + "loss": 0.9876, + "step": 702 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016927897735141952, + "loss": 1.0158, + "step": 703 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016919388689775464, + "loss": 0.9771, + "step": 704 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016910870021663883, + "loss": 0.942, + "step": 705 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016902341742654065, + "loss": 1.0217, + "step": 706 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016893803864606222, + "loss": 1.0346, + "step": 707 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016885256399393924, + "loss": 0.9891, + "step": 708 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016876699358904068, + "loss": 0.9697, + "step": 709 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016868132755036875, + "loss": 1.0062, + "step": 710 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016859556599705856, + "loss": 0.9822, + "step": 711 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001685097090483781, + "loss": 1.0921, + "step": 712 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016842375682372805, + "loss": 1.0126, + "step": 713 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016833770944264153, + "loss": 1.0043, + "step": 714 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016825156702478407, + "loss": 0.952, + "step": 715 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016816532968995328, + "loss": 1.0423, + "step": 716 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016807899755807886, + "loss": 1.0465, + "step": 717 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016799257074922224, + "loss": 0.9827, + "step": 718 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016790604938357663, + "loss": 0.9798, + "step": 719 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016781943358146664, + "loss": 1.0268, + "step": 720 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016773272346334828, + "loss": 1.0007, + "step": 721 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001676459191498087, + "loss": 0.9989, + "step": 722 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016755902076156604, + "loss": 0.9374, + "step": 723 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016747202841946928, + "loss": 1.0031, + "step": 724 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016738494224449802, + "loss": 0.9751, + "step": 725 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016729776235776246, + "loss": 1.1055, + "step": 726 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016721048888050302, + "loss": 1.0527, + "step": 727 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001671231219340903, + "loss": 1.0048, + "step": 728 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001670356616400249, + "loss": 0.957, + "step": 729 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016694810811993723, + "loss": 1.0598, + "step": 730 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016686046149558736, + "loss": 1.02, + "step": 731 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016677272188886483, + "loss": 0.9973, + "step": 732 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016668488942178856, + "loss": 1.0685, + "step": 733 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016659696421650645, + "loss": 0.9783, + "step": 734 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016650894639529544, + "loss": 0.9767, + "step": 735 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016642083608056141, + "loss": 1.0192, + "step": 736 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016633263339483866, + "loss": 1.0121, + "step": 737 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016624433846079012, + "loss": 0.9817, + "step": 738 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016615595140120686, + "loss": 1.1145, + "step": 739 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016606747233900815, + "loss": 0.9862, + "step": 740 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016597890139724125, + "loss": 1.0606, + "step": 741 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658902386990811, + "loss": 1.0416, + "step": 742 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658014843678303, + "loss": 0.9971, + "step": 743 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016571263852691888, + "loss": 1.0318, + "step": 744 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001656237012999041, + "loss": 1.0633, + "step": 745 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001655346728104704, + "loss": 1.0418, + "step": 746 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016544555318242897, + "loss": 0.9308, + "step": 747 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016535634253971794, + "loss": 1.1049, + "step": 748 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001652670410064019, + "loss": 0.9377, + "step": 749 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016517764870667182, + "loss": 0.9934, + "step": 750 + }, + { + "epoch": 1.1, + "learning_rate": 0.000165088165764845, + "loss": 1.0467, + "step": 751 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016499859230536466, + "loss": 1.0172, + "step": 752 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649089284528001, + "loss": 0.9922, + "step": 753 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016481917433184607, + "loss": 1.0373, + "step": 754 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001647293300673231, + "loss": 1.0377, + "step": 755 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016463939578417692, + "loss": 0.9991, + "step": 756 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016454937160747854, + "loss": 1.0657, + "step": 757 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016445925766242391, + "loss": 0.9954, + "step": 758 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001643690540743339, + "loss": 1.018, + "step": 759 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016427876096865394, + "loss": 1.01, + "step": 760 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001641883784709541, + "loss": 0.9318, + "step": 761 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001640979067069286, + "loss": 1.0174, + "step": 762 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016400734580239594, + "loss": 1.0886, + "step": 763 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001639166958832985, + "loss": 1.0316, + "step": 764 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638259570757025, + "loss": 1.0514, + "step": 765 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001637351295057978, + "loss": 0.9914, + "step": 766 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016364421329989755, + "loss": 1.0529, + "step": 767 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016355320858443842, + "loss": 0.9689, + "step": 768 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016346211548597995, + "loss": 1.0398, + "step": 769 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001633709341312046, + "loss": 1.0127, + "step": 770 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016327966464691778, + "loss": 1.1388, + "step": 771 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016318830716004722, + "loss": 0.9659, + "step": 772 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016309686179764317, + "loss": 0.9907, + "step": 773 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016300532868687806, + "loss": 0.9168, + "step": 774 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629137079550463, + "loss": 1.06, + "step": 775 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016282199972956425, + "loss": 0.9826, + "step": 776 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016273020413796983, + "loss": 1.0496, + "step": 777 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001626383213079226, + "loss": 1.0245, + "step": 778 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016254635136720328, + "loss": 1.036, + "step": 779 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001624542944437139, + "loss": 1.0283, + "step": 780 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016236215066547734, + "loss": 1.0078, + "step": 781 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016226992016063723, + "loss": 0.9819, + "step": 782 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016217760305745803, + "loss": 1.0687, + "step": 783 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001620851994843244, + "loss": 1.0523, + "step": 784 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016199270956974128, + "loss": 1.0279, + "step": 785 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016190013344233388, + "loss": 1.0559, + "step": 786 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016180747123084705, + "loss": 1.0844, + "step": 787 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016171472306414554, + "loss": 1.0724, + "step": 788 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016162188907121354, + "loss": 0.9696, + "step": 789 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016152896938115464, + "loss": 0.9551, + "step": 790 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614359641231916, + "loss": 1.0032, + "step": 791 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613428734266662, + "loss": 1.1404, + "step": 792 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016124969742103897, + "loss": 1.0329, + "step": 793 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016115643623588915, + "loss": 1.039, + "step": 794 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610630900009144, + "loss": 1.0231, + "step": 795 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609696588459307, + "loss": 1.0659, + "step": 796 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016087614290087208, + "loss": 1.0029, + "step": 797 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001607825422957905, + "loss": 0.985, + "step": 798 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016068885716085567, + "loss": 0.9392, + "step": 799 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016059508762635482, + "loss": 1.006, + "step": 800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016050123382269264, + "loss": 1.0748, + "step": 801 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604072958803909, + "loss": 1.1378, + "step": 802 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016031327393008845, + "loss": 1.058, + "step": 803 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016021916810254097, + "loss": 0.9827, + "step": 804 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016012497852862075, + "loss": 0.9572, + "step": 805 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016003070533931657, + "loss": 1.0042, + "step": 806 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015993634866573347, + "loss": 0.9521, + "step": 807 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001598419086390927, + "loss": 0.9395, + "step": 808 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015974738539073125, + "loss": 1.0902, + "step": 809 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015965277905210195, + "loss": 1.0408, + "step": 810 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015955808975477319, + "loss": 1.0436, + "step": 811 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015946331763042867, + "loss": 1.0845, + "step": 812 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015936846281086736, + "loss": 1.0752, + "step": 813 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015927352542800317, + "loss": 1.0832, + "step": 814 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015917850561386488, + "loss": 0.9901, + "step": 815 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015908340350059583, + "loss": 1.0311, + "step": 816 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015898821922045385, + "loss": 0.9858, + "step": 817 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001588929529058111, + "loss": 0.9541, + "step": 818 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015879760468915372, + "loss": 0.9516, + "step": 819 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015870217470308188, + "loss": 1.0791, + "step": 820 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015860666308030932, + "loss": 0.9099, + "step": 821 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015851106995366337, + "loss": 1.0983, + "step": 822 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015841539545608478, + "loss": 0.9951, + "step": 823 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015831963972062733, + "loss": 0.9661, + "step": 824 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015822380288045792, + "loss": 1.0111, + "step": 825 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001581278850688561, + "loss": 1.0436, + "step": 826 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015803188641921417, + "loss": 1.0916, + "step": 827 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001579358070650367, + "loss": 1.0347, + "step": 828 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001578396471399406, + "loss": 0.9978, + "step": 829 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577434067776548, + "loss": 1.0036, + "step": 830 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015764708611202015, + "loss": 1.0387, + "step": 831 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015755068527698902, + "loss": 1.0172, + "step": 832 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015745420440662543, + "loss": 0.9723, + "step": 833 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001573576436351046, + "loss": 0.9662, + "step": 834 + }, + { + "epoch": 1.23, + "learning_rate": 0.000157261003096713, + "loss": 0.9849, + "step": 835 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015716428292584787, + "loss": 1.0198, + "step": 836 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015706748325701732, + "loss": 0.9015, + "step": 837 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569706042248399, + "loss": 1.001, + "step": 838 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001568736459640447, + "loss": 0.9681, + "step": 839 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015677660860947078, + "loss": 1.0513, + "step": 840 + }, + { + "epoch": 1.23, + "eval_loss": 1.008791446685791, + "eval_runtime": 2.6133, + "eval_samples_per_second": 417.867, + "eval_steps_per_second": 26.404, + "step": 840 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001566794922960674, + "loss": 0.9829, + "step": 841 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015658229715889347, + "loss": 1.0362, + "step": 842 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015648502333311757, + "loss": 0.9736, + "step": 843 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001563876709540178, + "loss": 1.0457, + "step": 844 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015629024015698136, + "loss": 0.9786, + "step": 845 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015619273107750462, + "loss": 1.0595, + "step": 846 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001560951438511927, + "loss": 1.0307, + "step": 847 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015599747861375955, + "loss": 1.0386, + "step": 848 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015589973550102747, + "loss": 0.9916, + "step": 849 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015580191464892716, + "loss": 0.9652, + "step": 850 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015570401619349736, + "loss": 0.9691, + "step": 851 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015560604027088477, + "loss": 1.0006, + "step": 852 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015550798701734385, + "loss": 1.0271, + "step": 853 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015540985656923645, + "loss": 1.0591, + "step": 854 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015531164906303207, + "loss": 0.967, + "step": 855 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015521336463530705, + "loss": 1.0466, + "step": 856 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001551150034227449, + "loss": 0.9953, + "step": 857 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001550165655621359, + "loss": 0.9899, + "step": 858 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015491805119037684, + "loss": 0.9742, + "step": 859 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015481946044447099, + "loss": 0.9865, + "step": 860 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001547207934615278, + "loss": 0.9384, + "step": 861 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015462205037876275, + "loss": 1.0216, + "step": 862 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015452323133349714, + "loss": 0.9467, + "step": 863 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001544243364631579, + "loss": 1.0038, + "step": 864 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001543253659052775, + "loss": 0.978, + "step": 865 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015422631979749354, + "loss": 1.0434, + "step": 866 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015412719827754873, + "loss": 1.0091, + "step": 867 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015402800148329071, + "loss": 0.9598, + "step": 868 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015392872955267175, + "loss": 0.9876, + "step": 869 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015382938262374865, + "loss": 0.9559, + "step": 870 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001537299608346824, + "loss": 0.9984, + "step": 871 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015363046432373824, + "loss": 1.0171, + "step": 872 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001535308932292853, + "loss": 1.0188, + "step": 873 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015343124768979637, + "loss": 0.9613, + "step": 874 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015333152784384777, + "loss": 0.9572, + "step": 875 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532317338301192, + "loss": 1.0093, + "step": 876 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015313186578739353, + "loss": 0.9935, + "step": 877 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001530319238545565, + "loss": 1.0371, + "step": 878 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015293190817059667, + "loss": 1.0022, + "step": 879 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015283181887460517, + "loss": 1.0033, + "step": 880 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015273165610577542, + "loss": 0.9986, + "step": 881 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015263142000340312, + "loss": 1.0495, + "step": 882 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525311107068859, + "loss": 1.017, + "step": 883 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015243072835572318, + "loss": 0.9757, + "step": 884 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152330273089516, + "loss": 1.0342, + "step": 885 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001522297450479668, + "loss": 1.0059, + "step": 886 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015212914437087922, + "loss": 0.9845, + "step": 887 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001520284711981579, + "loss": 1.0365, + "step": 888 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519277256698083, + "loss": 0.9521, + "step": 889 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001518269079259366, + "loss": 1.0867, + "step": 890 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015172601810674915, + "loss": 1.0444, + "step": 891 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015162505635255287, + "loss": 1.077, + "step": 892 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015152402280375454, + "loss": 0.9883, + "step": 893 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001514229176008607, + "loss": 0.9819, + "step": 894 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015132174088447776, + "loss": 0.9912, + "step": 895 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015122049279531143, + "loss": 0.9575, + "step": 896 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015111917347416671, + "loss": 1.0356, + "step": 897 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015101778306194765, + "loss": 0.9963, + "step": 898 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001509163216996572, + "loss": 0.9728, + "step": 899 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015081478952839693, + "loss": 1.0402, + "step": 900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015071318668936695, + "loss": 1.0287, + "step": 901 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015061151332386566, + "loss": 1.0505, + "step": 902 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015050976957328938, + "loss": 0.9814, + "step": 903 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015040795557913245, + "loss": 1.0083, + "step": 904 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015030607148298696, + "loss": 1.0871, + "step": 905 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015020411742654237, + "loss": 1.0943, + "step": 906 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001501020935515854, + "loss": 1.0631, + "step": 907 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015000000000000001, + "loss": 1.0615, + "step": 908 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014989783691376696, + "loss": 0.8933, + "step": 909 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001497956044349637, + "loss": 1.012, + "step": 910 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014969330270576427, + "loss": 0.9215, + "step": 911 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014959093186843895, + "loss": 0.9894, + "step": 912 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014948849206535412, + "loss": 1.0053, + "step": 913 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014938598343897214, + "loss": 1.0975, + "step": 914 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014928340613185097, + "loss": 1.068, + "step": 915 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001491807602866442, + "loss": 0.9838, + "step": 916 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014907804604610063, + "loss": 1.1493, + "step": 917 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014897526355306428, + "loss": 0.9491, + "step": 918 + }, + { + "epoch": 1.35, + "learning_rate": 0.000148872412950474, + "loss": 1.0252, + "step": 919 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014876949438136347, + "loss": 0.9555, + "step": 920 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014866650798886074, + "loss": 0.9831, + "step": 921 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001485634539161883, + "loss": 1.0957, + "step": 922 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484603323066627, + "loss": 0.9606, + "step": 923 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014835714330369446, + "loss": 1.0643, + "step": 924 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014825388705078777, + "loss": 1.0219, + "step": 925 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014815056369154038, + "loss": 1.1315, + "step": 926 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001480471733696434, + "loss": 1.0406, + "step": 927 + }, + { + "epoch": 1.36, + "learning_rate": 0.000147943716228881, + "loss": 1.0284, + "step": 928 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014784019241313026, + "loss": 1.035, + "step": 929 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014773660206636105, + "loss": 1.0562, + "step": 930 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001476329453326357, + "loss": 0.9813, + "step": 931 + }, + { + "epoch": 1.37, + "learning_rate": 0.000147529222356109, + "loss": 1.0865, + "step": 932 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001474254332810277, + "loss": 1.0074, + "step": 933 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014732157825173044, + "loss": 1.0855, + "step": 934 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014721765741264786, + "loss": 0.9785, + "step": 935 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001471136709083018, + "loss": 1.011, + "step": 936 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014700961888330563, + "loss": 1.0484, + "step": 937 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001469055014823637, + "loss": 1.0435, + "step": 938 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014680131885027141, + "loss": 1.0176, + "step": 939 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014669707113191483, + "loss": 0.9542, + "step": 940 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014659275847227042, + "loss": 0.9526, + "step": 941 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014648838101640518, + "loss": 0.9681, + "step": 942 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014638393890947603, + "loss": 0.9072, + "step": 943 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001462794322967299, + "loss": 0.9939, + "step": 944 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014617486132350343, + "loss": 1.018, + "step": 945 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001460702261352226, + "loss": 0.8993, + "step": 946 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014596552687740302, + "loss": 1.0134, + "step": 947 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014586076369564908, + "loss": 0.947, + "step": 948 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014575593673565426, + "loss": 1.0697, + "step": 949 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014565104614320065, + "loss": 1.006, + "step": 950 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014554609206415885, + "loss": 1.0262, + "step": 951 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014544107464448775, + "loss": 0.9809, + "step": 952 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001453359940302344, + "loss": 0.981, + "step": 953 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014523085036753354, + "loss": 0.9925, + "step": 954 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014512564380260787, + "loss": 1.0199, + "step": 955 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014502037448176734, + "loss": 0.9715, + "step": 956 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014491504255140927, + "loss": 1.0072, + "step": 957 + }, + { + "epoch": 1.41, + "learning_rate": 0.000144809648158018, + "loss": 1.0659, + "step": 958 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014470419144816483, + "loss": 1.0538, + "step": 959 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001445986725685076, + "loss": 1.0571, + "step": 960 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014449309166579072, + "loss": 0.9701, + "step": 961 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014438744888684482, + "loss": 0.9618, + "step": 962 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001442817443785865, + "loss": 0.9179, + "step": 963 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014417597828801832, + "loss": 1.0613, + "step": 964 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014407015076222846, + "loss": 0.9558, + "step": 965 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014396426194839042, + "loss": 0.9823, + "step": 966 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014385831199376317, + "loss": 0.9968, + "step": 967 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014375230104569044, + "loss": 0.9829, + "step": 968 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014364622925160098, + "loss": 1.0552, + "step": 969 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014354009675900803, + "loss": 0.993, + "step": 970 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014343390371550935, + "loss": 1.0927, + "step": 971 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014332765026878687, + "loss": 1.0387, + "step": 972 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014322133656660647, + "loss": 0.9255, + "step": 973 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014311496275681783, + "loss": 1.0093, + "step": 974 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014300852898735435, + "loss": 1.0078, + "step": 975 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014290203540623267, + "loss": 0.9161, + "step": 976 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014279548216155266, + "loss": 1.03, + "step": 977 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014268886940149714, + "loss": 1.0364, + "step": 978 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001425821972743318, + "loss": 0.9768, + "step": 979 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001424754659284048, + "loss": 1.1229, + "step": 980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001423686755121466, + "loss": 1.0362, + "step": 981 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014226182617406996, + "loss": 0.9522, + "step": 982 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215491806276944, + "loss": 1.0479, + "step": 983 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014204795132692144, + "loss": 1.0671, + "step": 984 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014194092611528384, + "loss": 0.8983, + "step": 985 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014183384257669581, + "loss": 1.004, + "step": 986 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014172670086007774, + "loss": 1.0972, + "step": 987 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014161950111443077, + "loss": 1.0198, + "step": 988 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014151224348883692, + "loss": 1.0257, + "step": 989 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014140492813245856, + "loss": 0.9717, + "step": 990 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412975551945384, + "loss": 0.9455, + "step": 991 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001411901248243993, + "loss": 1.0372, + "step": 992 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001410826371714438, + "loss": 0.9961, + "step": 993 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014097509238515432, + "loss": 1.0599, + "step": 994 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014086749061509258, + "loss": 1.0166, + "step": 995 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014075983201089964, + "loss": 1.0254, + "step": 996 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014065211672229555, + "loss": 0.9979, + "step": 997 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014054434489907915, + "loss": 1.0365, + "step": 998 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014043651669112808, + "loss": 1.0075, + "step": 999 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014032863224839814, + "loss": 0.9743, + "step": 1000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014022069172092352, + "loss": 1.0056, + "step": 1001 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014011269525881636, + "loss": 0.9647, + "step": 1002 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014000464301226656, + "loss": 1.0912, + "step": 1003 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013989653513154165, + "loss": 0.8811, + "step": 1004 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013978837176698646, + "loss": 1.0667, + "step": 1005 + }, + { + "epoch": 1.48, + "learning_rate": 0.000139680153069023, + "loss": 1.0096, + "step": 1006 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013957187918815032, + "loss": 0.926, + "step": 1007 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001394635502749441, + "loss": 1.0814, + "step": 1008 + }, + { + "epoch": 1.48, + "eval_loss": 1.0040607452392578, + "eval_runtime": 2.6168, + "eval_samples_per_second": 417.304, + "eval_steps_per_second": 26.368, + "step": 1008 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001393551664800566, + "loss": 1.0941, + "step": 1009 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013924672795421637, + "loss": 1.044, + "step": 1010 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013913823484822815, + "loss": 1.049, + "step": 1011 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013902968731297255, + "loss": 0.9891, + "step": 1012 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013892108549940583, + "loss": 0.9663, + "step": 1013 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013881242955855974, + "loss": 1.0298, + "step": 1014 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001387037196415414, + "loss": 1.0083, + "step": 1015 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001385949558995329, + "loss": 0.9182, + "step": 1016 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013848613848379114, + "loss": 1.013, + "step": 1017 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013837726754564785, + "loss": 1.0022, + "step": 1018 + }, + { + "epoch": 1.5, + "learning_rate": 0.000138268343236509, + "loss": 0.9423, + "step": 1019 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013815936570785487, + "loss": 1.058, + "step": 1020 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013805033511123975, + "loss": 0.931, + "step": 1021 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013794125159829172, + "loss": 1.0137, + "step": 1022 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013783211532071246, + "loss": 1.0517, + "step": 1023 + }, + { + "epoch": 1.51, + "learning_rate": 0.000137722926430277, + "loss": 1.0259, + "step": 1024 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013761368507883359, + "loss": 1.0263, + "step": 1025 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013750439141830339, + "loss": 1.0286, + "step": 1026 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013739504560068033, + "loss": 0.9749, + "step": 1027 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013728564777803088, + "loss": 0.9317, + "step": 1028 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013717619810249378, + "loss": 1.0653, + "step": 1029 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013706669672627997, + "loss": 0.9623, + "step": 1030 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013695714380167223, + "loss": 0.9911, + "step": 1031 + }, + { + "epoch": 1.52, + "learning_rate": 0.000136847539481025, + "loss": 0.9843, + "step": 1032 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001367378839167643, + "loss": 0.981, + "step": 1033 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013662817726138728, + "loss": 1.0651, + "step": 1034 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013651841966746232, + "loss": 1.0602, + "step": 1035 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001364086112876284, + "loss": 0.9524, + "step": 1036 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013629875227459532, + "loss": 1.0264, + "step": 1037 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013618884278114324, + "loss": 1.0691, + "step": 1038 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013607888296012259, + "loss": 1.0527, + "step": 1039 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001359688729644536, + "loss": 0.9629, + "step": 1040 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001358588129471264, + "loss": 0.957, + "step": 1041 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013574870306120077, + "loss": 1.0976, + "step": 1042 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013563854345980569, + "loss": 0.9317, + "step": 1043 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013552833429613938, + "loss": 1.0359, + "step": 1044 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001354180757234689, + "loss": 1.0642, + "step": 1045 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001353077678951301, + "loss": 1.0526, + "step": 1046 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013519741096452726, + "loss": 1.0276, + "step": 1047 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013508700508513307, + "loss": 1.0471, + "step": 1048 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001349765504104881, + "loss": 1.0353, + "step": 1049 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013486604709420102, + "loss": 1.0025, + "step": 1050 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013475549528994786, + "loss": 0.9019, + "step": 1051 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013464489515147238, + "loss": 1.0453, + "step": 1052 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013453424683258528, + "loss": 1.0395, + "step": 1053 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001344235504871645, + "loss": 0.8939, + "step": 1054 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013431280626915467, + "loss": 0.9198, + "step": 1055 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013420201433256689, + "loss": 1.0046, + "step": 1056 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340911748314788, + "loss": 0.9197, + "step": 1057 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013398028792003413, + "loss": 0.9547, + "step": 1058 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013386935375244246, + "loss": 0.968, + "step": 1059 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013375837248297926, + "loss": 0.9611, + "step": 1060 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013364734426598527, + "loss": 1.0125, + "step": 1061 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013353626925586672, + "loss": 1.0179, + "step": 1062 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013342514760709485, + "loss": 1.04, + "step": 1063 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013331397947420576, + "loss": 0.9251, + "step": 1064 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013320276501180015, + "loss": 1.0762, + "step": 1065 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013309150437454322, + "loss": 1.0137, + "step": 1066 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013298019771716435, + "loss": 0.9981, + "step": 1067 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001328688451944569, + "loss": 1.003, + "step": 1068 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013275744696127805, + "loss": 1.0307, + "step": 1069 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013264600317254853, + "loss": 1.0257, + "step": 1070 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013253451398325249, + "loss": 1.0426, + "step": 1071 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013242297954843711, + "loss": 1.0167, + "step": 1072 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013231140002321253, + "loss": 1.012, + "step": 1073 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013219977556275163, + "loss": 1.0649, + "step": 1074 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013208810632228977, + "loss": 0.9297, + "step": 1075 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013197639245712454, + "loss": 0.9772, + "step": 1076 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013186463412261565, + "loss": 1.0194, + "step": 1077 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013175283147418465, + "loss": 1.0596, + "step": 1078 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013164098466731468, + "loss": 0.9938, + "step": 1079 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013152909385755025, + "loss": 0.9405, + "step": 1080 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001314171592004972, + "loss": 1.0175, + "step": 1081 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013130518085182225, + "loss": 0.9994, + "step": 1082 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013119315896725287, + "loss": 0.9524, + "step": 1083 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013108109370257712, + "loss": 0.9112, + "step": 1084 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013096898521364338, + "loss": 0.9339, + "step": 1085 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013085683365636014, + "loss": 0.9718, + "step": 1086 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001307446391866958, + "loss": 0.969, + "step": 1087 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013063240196067836, + "loss": 1.0255, + "step": 1088 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013052012213439536, + "loss": 1.0119, + "step": 1089 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013040779986399362, + "loss": 1.0396, + "step": 1090 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013029543530567884, + "loss": 1.0202, + "step": 1091 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001301830286157157, + "loss": 0.9024, + "step": 1092 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013007057995042732, + "loss": 1.0079, + "step": 1093 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001299580894661953, + "loss": 0.9771, + "step": 1094 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001298455573194594, + "loss": 0.9942, + "step": 1095 + }, + { + "epoch": 1.61, + "learning_rate": 0.00012973298366671725, + "loss": 0.9879, + "step": 1096 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012962036866452422, + "loss": 0.9365, + "step": 1097 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295077124694932, + "loss": 1.0128, + "step": 1098 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012939501523829444, + "loss": 1.0707, + "step": 1099 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012928227712765504, + "loss": 0.9769, + "step": 1100 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012916949829435922, + "loss": 1.0208, + "step": 1101 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001290566788952477, + "loss": 1.0376, + "step": 1102 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012894381908721756, + "loss": 1.0588, + "step": 1103 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001288309190272222, + "loss": 1.0217, + "step": 1104 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012871797887227087, + "loss": 0.9684, + "step": 1105 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012860499877942875, + "loss": 0.9753, + "step": 1106 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012849197890581638, + "loss": 1.0094, + "step": 1107 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012837891940860972, + "loss": 1.0346, + "step": 1108 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012826582044503978, + "loss": 0.8741, + "step": 1109 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012815268217239252, + "loss": 1.0223, + "step": 1110 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012803950474800862, + "loss": 0.8748, + "step": 1111 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012792628832928302, + "loss": 1.0296, + "step": 1112 + }, + { + "epoch": 1.64, + "learning_rate": 0.000127813033073665, + "loss": 0.9993, + "step": 1113 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012769973913865794, + "loss": 1.0555, + "step": 1114 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012758640668181882, + "loss": 1.0245, + "step": 1115 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001274730358607583, + "loss": 0.9502, + "step": 1116 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012735962683314042, + "loss": 1.0165, + "step": 1117 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272461797566823, + "loss": 1.0669, + "step": 1118 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127132694789154, + "loss": 0.8676, + "step": 1119 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001270191720883782, + "loss": 0.9432, + "step": 1120 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012690561181223024, + "loss": 1.0614, + "step": 1121 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001267920141186375, + "loss": 0.9924, + "step": 1122 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012667837916557954, + "loss": 1.139, + "step": 1123 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012656470711108764, + "loss": 1.0043, + "step": 1124 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012645099811324476, + "loss": 1.0747, + "step": 1125 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001263372523301852, + "loss": 0.9668, + "step": 1126 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012622346992009447, + "loss": 0.9931, + "step": 1127 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012610965104120885, + "loss": 0.9393, + "step": 1128 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012599579585181552, + "loss": 0.9918, + "step": 1129 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012588190451025207, + "loss": 1.0172, + "step": 1130 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012576797717490644, + "loss": 1.0586, + "step": 1131 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012565401400421651, + "loss": 1.0482, + "step": 1132 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012554001515667008, + "loss": 1.0548, + "step": 1133 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012542598079080456, + "loss": 1.0092, + "step": 1134 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012531191106520672, + "loss": 1.0162, + "step": 1135 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012519780613851254, + "loss": 1.0387, + "step": 1136 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250836661694069, + "loss": 0.9607, + "step": 1137 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012496949131662348, + "loss": 1.0025, + "step": 1138 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012485528173894448, + "loss": 1.0014, + "step": 1139 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012474103759520027, + "loss": 0.9838, + "step": 1140 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001246267590442694, + "loss": 1.0384, + "step": 1141 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012451244624507831, + "loss": 0.9958, + "step": 1142 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012439809935660095, + "loss": 0.9927, + "step": 1143 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001242837185378587, + "loss": 1.0082, + "step": 1144 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012416930394792026, + "loss": 0.9729, + "step": 1145 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012405485574590113, + "loss": 1.0464, + "step": 1146 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012394037409096357, + "loss": 0.987, + "step": 1147 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001238258591423165, + "loss": 0.9402, + "step": 1148 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012371131105921504, + "loss": 1.0293, + "step": 1149 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012359673000096033, + "loss": 0.9418, + "step": 1150 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234821161268995, + "loss": 0.964, + "step": 1151 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012336746959642526, + "loss": 0.9982, + "step": 1152 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001232527905689757, + "loss": 0.9364, + "step": 1153 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012313807920403419, + "loss": 0.9399, + "step": 1154 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001230233356611289, + "loss": 1.015, + "step": 1155 + }, + { + "epoch": 1.7, + "learning_rate": 0.000122908560099833, + "loss": 1.0214, + "step": 1156 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012279375267976398, + "loss": 1.0262, + "step": 1157 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012267891356058377, + "loss": 1.0277, + "step": 1158 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012256404290199825, + "loss": 1.0095, + "step": 1159 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012244914086375724, + "loss": 1.0314, + "step": 1160 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012233420760565428, + "loss": 0.8282, + "step": 1161 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012221924328752616, + "loss": 0.9709, + "step": 1162 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012210424806925301, + "loss": 0.941, + "step": 1163 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012198922211075778, + "loss": 0.9716, + "step": 1164 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012187416557200633, + "loss": 1.0125, + "step": 1165 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012175907861300697, + "loss": 1.0159, + "step": 1166 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012164396139381029, + "loss": 0.9306, + "step": 1167 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012152881407450905, + "loss": 1.1056, + "step": 1168 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012141363681523776, + "loss": 1.0113, + "step": 1169 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012129842977617265, + "loss": 0.9983, + "step": 1170 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012118319311753137, + "loss": 1.0076, + "step": 1171 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012106792699957263, + "loss": 1.1181, + "step": 1172 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012095263158259631, + "loss": 0.8759, + "step": 1173 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012083730702694291, + "loss": 0.9855, + "step": 1174 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012072195349299345, + "loss": 1.1361, + "step": 1175 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012060657114116926, + "loss": 1.0275, + "step": 1176 + }, + { + "epoch": 1.73, + "eval_loss": 0.9928944110870361, + "eval_runtime": 2.6469, + "eval_samples_per_second": 412.56, + "eval_steps_per_second": 26.068, + "step": 1176 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001204911601319318, + "loss": 1.0256, + "step": 1177 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012037572062578238, + "loss": 0.9218, + "step": 1178 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012026025278326187, + "loss": 1.0394, + "step": 1179 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012014475676495052, + "loss": 1.0318, + "step": 1180 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012002923273146794, + "loss": 1.0361, + "step": 1181 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011991368084347252, + "loss": 1.0093, + "step": 1182 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011979810126166151, + "loss": 0.9527, + "step": 1183 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011968249414677055, + "loss": 1.0946, + "step": 1184 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011956685965957368, + "loss": 1.0124, + "step": 1185 + }, + { + "epoch": 1.75, + "learning_rate": 0.000119451197960883, + "loss": 1.0074, + "step": 1186 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011933550921154834, + "loss": 1.0315, + "step": 1187 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001192197935724573, + "loss": 0.9915, + "step": 1188 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011910405120453476, + "loss": 0.9823, + "step": 1189 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011898828226874284, + "loss": 1.0294, + "step": 1190 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011887248692608057, + "loss": 1.0176, + "step": 1191 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011875666533758372, + "loss": 1.0486, + "step": 1192 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011864081766432456, + "loss": 1.0237, + "step": 1193 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011852494406741165, + "loss": 1.0469, + "step": 1194 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011840904470798955, + "loss": 0.9545, + "step": 1195 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011829311974723867, + "loss": 0.9812, + "step": 1196 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011817716934637509, + "loss": 1.0503, + "step": 1197 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001180611936666502, + "loss": 1.0693, + "step": 1198 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011794519286935055, + "loss": 0.9627, + "step": 1199 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011782916711579759, + "loss": 0.9728, + "step": 1200 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001177131165673476, + "loss": 1.13, + "step": 1201 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001175970413853912, + "loss": 0.9756, + "step": 1202 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011748094173135337, + "loss": 1.0069, + "step": 1203 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011736481776669306, + "loss": 1.033, + "step": 1204 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011724866965290302, + "loss": 0.9906, + "step": 1205 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011713249755150965, + "loss": 1.1008, + "step": 1206 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011701630162407266, + "loss": 0.9987, + "step": 1207 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011690008203218493, + "loss": 1.0122, + "step": 1208 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001167838389374722, + "loss": 1.0495, + "step": 1209 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001166675725015929, + "loss": 0.9875, + "step": 1210 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011655128288623802, + "loss": 1.0231, + "step": 1211 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011643497025313061, + "loss": 0.9342, + "step": 1212 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011631863476402594, + "loss": 1.1006, + "step": 1213 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011620227658071087, + "loss": 0.9264, + "step": 1214 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011608589586500391, + "loss": 1.1099, + "step": 1215 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011596949277875495, + "loss": 1.0326, + "step": 1216 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001158530674838449, + "loss": 0.9235, + "step": 1217 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011573662014218564, + "loss": 1.0227, + "step": 1218 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011562015091571963, + "loss": 1.0028, + "step": 1219 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011550365996641979, + "loss": 1.0744, + "step": 1220 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011538714745628931, + "loss": 0.9521, + "step": 1221 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011527061354736129, + "loss": 1.0171, + "step": 1222 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011515405840169861, + "loss": 1.0481, + "step": 1223 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011503748218139369, + "loss": 1.0034, + "step": 1224 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011492088504856826, + "loss": 1.1384, + "step": 1225 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011480426716537315, + "loss": 1.0268, + "step": 1226 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011468762869398802, + "loss": 1.003, + "step": 1227 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011457096979662114, + "loss": 1.1087, + "step": 1228 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011445429063550926, + "loss": 1.0809, + "step": 1229 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011433759137291727, + "loss": 1.0054, + "step": 1230 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011422087217113795, + "loss": 0.9416, + "step": 1231 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011410413319249194, + "loss": 1.0153, + "step": 1232 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011398737459932728, + "loss": 1.0622, + "step": 1233 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011387059655401932, + "loss": 1.0792, + "step": 1234 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011375379921897051, + "loss": 0.9822, + "step": 1235 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011363698275661001, + "loss": 0.9949, + "step": 1236 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011352014732939369, + "loss": 0.9653, + "step": 1237 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011340329309980377, + "loss": 1.0694, + "step": 1238 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011328642023034857, + "loss": 0.9925, + "step": 1239 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011316952888356237, + "loss": 0.9829, + "step": 1240 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011305261922200519, + "loss": 0.9659, + "step": 1241 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011293569140826239, + "loss": 1.109, + "step": 1242 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011281874560494472, + "loss": 1.0614, + "step": 1243 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011270178197468789, + "loss": 0.9013, + "step": 1244 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011258480068015235, + "loss": 1.0049, + "step": 1245 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011246780188402322, + "loss": 0.9746, + "step": 1246 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011235078574900984, + "loss": 1.1433, + "step": 1247 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011223375243784573, + "loss": 1.0196, + "step": 1248 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011211670211328833, + "loss": 0.9859, + "step": 1249 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001119996349381187, + "loss": 0.9037, + "step": 1250 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001118825510751413, + "loss": 1.0481, + "step": 1251 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011176545068718385, + "loss": 1.0324, + "step": 1252 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011164833393709706, + "loss": 1.0155, + "step": 1253 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011153120098775434, + "loss": 0.967, + "step": 1254 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011141405200205166, + "loss": 0.9766, + "step": 1255 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011129688714290729, + "loss": 1.0075, + "step": 1256 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011117970657326158, + "loss": 0.9472, + "step": 1257 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011106251045607674, + "loss": 0.9949, + "step": 1258 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011094529895433652, + "loss": 1.0302, + "step": 1259 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001108280722310462, + "loss": 1.0538, + "step": 1260 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011071083044923214, + "loss": 1.0025, + "step": 1261 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011059357377194161, + "loss": 1.024, + "step": 1262 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011047630236224271, + "loss": 0.9452, + "step": 1263 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011035901638322392, + "loss": 1.0055, + "step": 1264 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011024171599799409, + "loss": 0.9875, + "step": 1265 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011012440136968196, + "loss": 0.9582, + "step": 1266 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011000707266143617, + "loss": 0.9986, + "step": 1267 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010988973003642499, + "loss": 1.0328, + "step": 1268 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001097723736578359, + "loss": 1.0108, + "step": 1269 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010965500368887567, + "loss": 0.9941, + "step": 1270 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010953762029276982, + "loss": 1.0842, + "step": 1271 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010942022363276264, + "loss": 0.994, + "step": 1272 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010930281387211683, + "loss": 1.0151, + "step": 1273 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010918539117411333, + "loss": 1.0172, + "step": 1274 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010906795570205104, + "loss": 1.0698, + "step": 1275 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010895050761924668, + "loss": 0.9835, + "step": 1276 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001088330470890344, + "loss": 0.9461, + "step": 1277 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010871557427476583, + "loss": 1.0394, + "step": 1278 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010859808933980948, + "loss": 0.9639, + "step": 1279 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010848059244755093, + "loss": 0.9863, + "step": 1280 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010836308376139221, + "loss": 1.0728, + "step": 1281 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010824556344475181, + "loss": 0.9989, + "step": 1282 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010812803166106444, + "loss": 0.962, + "step": 1283 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010801048857378071, + "loss": 0.8658, + "step": 1284 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010789293434636698, + "loss": 1.0488, + "step": 1285 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010777536914230508, + "loss": 1.0183, + "step": 1286 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010765779312509208, + "loss": 0.9535, + "step": 1287 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010754020645824017, + "loss": 0.9978, + "step": 1288 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010742260930527625, + "loss": 0.8929, + "step": 1289 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001073050018297419, + "loss": 0.9762, + "step": 1290 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010718738419519297, + "loss": 1.0559, + "step": 1291 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010706975656519946, + "loss": 1.0327, + "step": 1292 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010695211910334537, + "loss": 1.0322, + "step": 1293 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010683447197322817, + "loss": 1.0542, + "step": 1294 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010671681533845899, + "loss": 1.0521, + "step": 1295 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010659914936266206, + "loss": 0.9967, + "step": 1296 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010648147420947461, + "loss": 1.0491, + "step": 1297 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010636379004254664, + "loss": 0.9035, + "step": 1298 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010624609702554069, + "loss": 1.0704, + "step": 1299 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010612839532213164, + "loss": 0.9533, + "step": 1300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010601068509600642, + "loss": 1.0396, + "step": 1301 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010589296651086376, + "loss": 0.9543, + "step": 1302 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001057752397304141, + "loss": 1.0591, + "step": 1303 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010565750491837925, + "loss": 1.1191, + "step": 1304 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010553976223849218, + "loss": 0.916, + "step": 1305 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010542201185449678, + "loss": 0.9732, + "step": 1306 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010530425393014774, + "loss": 1.01, + "step": 1307 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010518648862921012, + "loss": 0.9849, + "step": 1308 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001050687161154593, + "loss": 1.0519, + "step": 1309 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010495093655268071, + "loss": 1.0539, + "step": 1310 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010483315010466952, + "loss": 0.9922, + "step": 1311 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010471535693523057, + "loss": 1.0048, + "step": 1312 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010459755720817797, + "loss": 1.0576, + "step": 1313 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010447975108733492, + "loss": 1.0268, + "step": 1314 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010436193873653361, + "loss": 1.0566, + "step": 1315 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010424412031961484, + "loss": 1.0294, + "step": 1316 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010412629600042785, + "loss": 1.0808, + "step": 1317 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010400846594283012, + "loss": 1.0487, + "step": 1318 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010389063031068698, + "loss": 1.04, + "step": 1319 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010377278926787173, + "loss": 1.033, + "step": 1320 + }, + { + "epoch": 1.95, + "learning_rate": 0.000103654942978265, + "loss": 0.9637, + "step": 1321 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010353709160575489, + "loss": 0.9665, + "step": 1322 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010341923531423634, + "loss": 1.0079, + "step": 1323 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010330137426761135, + "loss": 0.9989, + "step": 1324 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010318350862978848, + "loss": 1.0103, + "step": 1325 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010306563856468253, + "loss": 0.9872, + "step": 1326 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010294776423621464, + "loss": 0.9684, + "step": 1327 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010282988580831183, + "loss": 0.9745, + "step": 1328 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010271200344490674, + "loss": 1.0621, + "step": 1329 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001025941173099376, + "loss": 1.0639, + "step": 1330 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010247622756734774, + "loss": 0.914, + "step": 1331 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010235833438108571, + "loss": 1.0135, + "step": 1332 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010224043791510465, + "loss": 1.0132, + "step": 1333 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010212253833336237, + "loss": 0.9912, + "step": 1334 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010200463579982098, + "loss": 0.9869, + "step": 1335 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001018867304784467, + "loss": 0.9784, + "step": 1336 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010176882253320967, + "loss": 0.9837, + "step": 1337 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001016509121280836, + "loss": 1.039, + "step": 1338 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010153299942704566, + "loss": 0.9984, + "step": 1339 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010141508459407623, + "loss": 1.0526, + "step": 1340 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010129716779315862, + "loss": 1.0581, + "step": 1341 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001011792491882789, + "loss": 1.0607, + "step": 1342 + } + ], + "logging_steps": 1, + "max_steps": 2684, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 671, + "total_flos": 4.1689736495602074e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1342/training_args.bin b/checkpoint-1342/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..439fe237329d4c6dab9a083d1f0b3c5d2e07ff34 --- /dev/null +++ b/checkpoint-1342/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f074b3cd0fbc5cecae753dfd6c83754f9e22c6bc7af03db47b3beb5a1a41c9 +size 4923 diff --git a/checkpoint-2013/README.md b/checkpoint-2013/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c40158a9bf29b5b6a4b1c7d97250d59a2f05ed92 --- /dev/null +++ b/checkpoint-2013/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: openlm-research/open_llama_3b_v2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-2013/adapter_config.json b/checkpoint-2013/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a75a5db14cc030f9130cc346972670dcccc55fe2 --- /dev/null +++ b/checkpoint-2013/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "openlm-research/open_llama_3b_v2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-2013/adapter_model.safetensors b/checkpoint-2013/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5c893d5e092879e777cf8d0192050f469568daa --- /dev/null +++ b/checkpoint-2013/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f113695fed016ee64ca9bcbff0b3211f7f097fcdc83be77c2df07653d441c40 +size 50899792 diff --git a/checkpoint-2013/optimizer.pt b/checkpoint-2013/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4aa40286fe38a9804132f9942931b6c071b2b3c1 --- /dev/null +++ b/checkpoint-2013/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e446180ec8d6f61cc314e69c08626a0f4bcbc838673728643184980c1c1c96c2 +size 25871439 diff --git a/checkpoint-2013/rng_state_0.pth b/checkpoint-2013/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..f94882b9a9ba628f46402e80ffd2e44d82d3e0ca --- /dev/null +++ b/checkpoint-2013/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1993bb3feead7fcc4fe2fa15e1397ca90a31b83f8dca34ca7026b26be0743f3 +size 21687 diff --git a/checkpoint-2013/rng_state_1.pth b/checkpoint-2013/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..04a0bac685bdbe903cf4edd9d568edcdd0900ed6 --- /dev/null +++ b/checkpoint-2013/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6108ec1b316f043b5b3a9c65faa36d2c3135b362cbc7a9c25ee1363131d2ce +size 21687 diff --git a/checkpoint-2013/rng_state_2.pth b/checkpoint-2013/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..edc8ece1594d6fd4d53ef224e51e260b1b667990 --- /dev/null +++ b/checkpoint-2013/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9c0c652900b2a758eddc193fd0aa176f06cd04c76fd10700ea8050507f2fae +size 21687 diff --git a/checkpoint-2013/rng_state_3.pth b/checkpoint-2013/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..165e2e414cc80d80397af0096268bf1ce5847f62 --- /dev/null +++ b/checkpoint-2013/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b58556a56241659eca33c1364b615026573d3cd3f8bda10ab5f6f401362665c +size 21687 diff --git a/checkpoint-2013/rng_state_4.pth b/checkpoint-2013/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a39be3e1c848abee546bd954c6ea0d9206165b1 --- /dev/null +++ b/checkpoint-2013/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305784ccd1f10dfbe45bc98368ca94463017a1333f740826d48bbb6fe98085a9 +size 21687 diff --git a/checkpoint-2013/rng_state_5.pth b/checkpoint-2013/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b29edcdc1c9d46117ec055070e5cb4816f673024 --- /dev/null +++ b/checkpoint-2013/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3eb219f2f8e0e20d1f13cdfdb817582af94e9d1ed554fe92a13e9b414c30a5b +size 21687 diff --git a/checkpoint-2013/rng_state_6.pth b/checkpoint-2013/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a3eac1cd2ff532c031e67474ec062a8a2413d91 --- /dev/null +++ b/checkpoint-2013/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75147ce6fd434a85eda10b30d429158f8d4d6a59d4b133ed42a4bcdb96deaaa8 +size 21687 diff --git a/checkpoint-2013/rng_state_7.pth b/checkpoint-2013/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e8ac5a959775cd1ead0f246d42dd1c60557ec462 --- /dev/null +++ b/checkpoint-2013/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da65ff9bc06bcab79348801da57ef25b8eb83b108fd5a008c39ab3e3162351fb +size 21687 diff --git a/checkpoint-2013/scheduler.pt b/checkpoint-2013/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..600ff587a92eccca95a1a4514da4cd38149b4c55 --- /dev/null +++ b/checkpoint-2013/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79bf06f2ad72d5930a915899610bb509611e58176d077f2633bf3b0f74b6232 +size 627 diff --git a/checkpoint-2013/trainer_state.json b/checkpoint-2013/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2e47e12b7df4101eb6a0f9a9cb8fd7b545cd9235 --- /dev/null +++ b/checkpoint-2013/trainer_state.json @@ -0,0 +1,12195 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.959761549925484, + "eval_steps": 168, + "global_step": 2013, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.3745, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 1.6296857595443726, + "eval_runtime": 2.6662, + "eval_samples_per_second": 409.572, + "eval_steps_per_second": 25.88, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.42, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.3057, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 1.2307, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.289, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.4111, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 7e-05, + "loss": 1.3089, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.3204, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 9e-05, + "loss": 1.3575, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.3279, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011000000000000002, + "loss": 1.3149, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012, + "loss": 1.2578, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013000000000000002, + "loss": 1.2849, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.2971, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015000000000000001, + "loss": 1.1473, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.1943, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017, + "loss": 1.1877, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018, + "loss": 1.1984, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019, + "loss": 1.2647, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.217, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999993046535236, + "loss": 1.0274, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999972186150606, + "loss": 1.2122, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999937418875124, + "loss": 1.1868, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999888744757143, + "loss": 1.2345, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999826163864348, + "loss": 1.2127, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999749676283775, + "loss": 1.2114, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999659282121792, + "loss": 1.2224, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955498150411, + "loss": 1.1517, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999943677457578, + "loss": 1.1631, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999930466150119, + "loss": 1.0465, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999915864246407, + "loss": 1.1847, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999899871766749, + "loss": 1.1238, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999882488733385, + "loss": 1.1491, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199986371517049, + "loss": 1.276, + "step": 34 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999843551104172, + "loss": 1.0911, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998219965624734, + "loss": 1.1276, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997990515753693, + "loss": 1.0981, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997747161747695, + "loss": 1.0901, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999748990394517, + "loss": 1.096, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997218742703887, + "loss": 1.122, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996933678400946, + "loss": 1.1132, + "step": 41 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996634711432786, + "loss": 1.1498, + "step": 42 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996321842215173, + "loss": 1.0708, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999599507118322, + "loss": 1.1154, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995654398791355, + "loss": 1.2118, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995299825513357, + "loss": 1.0919, + "step": 46 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994931351842327, + "loss": 1.1364, + "step": 47 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994548978290695, + "loss": 1.1442, + "step": 48 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999415270539023, + "loss": 1.1248, + "step": 49 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019993742533692022, + "loss": 1.1366, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019993318463766495, + "loss": 1.1437, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199928804962034, + "loss": 1.1191, + "step": 52 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999242863161182, + "loss": 1.0786, + "step": 53 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991962870620153, + "loss": 1.1951, + "step": 54 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991483213876134, + "loss": 1.1321, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019990989662046818, + "loss": 1.0876, + "step": 56 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999048221581858, + "loss": 1.1794, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989960875897126, + "loss": 1.1796, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989425643007476, + "loss": 1.1165, + "step": 59 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998887651789398, + "loss": 1.1978, + "step": 60 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019988313501320297, + "loss": 1.1693, + "step": 61 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019987736594069414, + "loss": 1.1553, + "step": 62 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998714579694363, + "loss": 1.1959, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019986541110764565, + "loss": 1.1945, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985922536373146, + "loss": 1.121, + "step": 65 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985290074629627, + "loss": 1.122, + "step": 66 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019984643726413565, + "loss": 1.1435, + "step": 67 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019983983492623833, + "loss": 1.0413, + "step": 68 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998330937417861, + "loss": 1.078, + "step": 69 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998262137201539, + "loss": 1.0811, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981919487090972, + "loss": 1.1639, + "step": 71 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981203720381463, + "loss": 1.164, + "step": 72 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019980474072882277, + "loss": 1.1006, + "step": 73 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019979730545608126, + "loss": 1.1926, + "step": 74 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001997897313959303, + "loss": 1.1129, + "step": 75 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019978201855890308, + "loss": 1.1367, + "step": 76 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019977416695572578, + "loss": 1.1495, + "step": 77 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997661765973176, + "loss": 1.1567, + "step": 78 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975804749479062, + "loss": 1.2102, + "step": 79 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974977965945, + "loss": 1.1175, + "step": 80 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997413731027937, + "loss": 1.1243, + "step": 81 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973282783651263, + "loss": 1.1406, + "step": 82 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972414387249072, + "loss": 1.09, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019971532122280464, + "loss": 1.0115, + "step": 84 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019970635989972402, + "loss": 1.0328, + "step": 85 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019969725991571128, + "loss": 1.1226, + "step": 86 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019968802128342172, + "loss": 1.0747, + "step": 87 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019967864401570343, + "loss": 1.119, + "step": 88 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019966912812559732, + "loss": 1.1125, + "step": 89 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019965947362633708, + "loss": 1.0734, + "step": 90 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996496805313491, + "loss": 1.1798, + "step": 91 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019963974885425266, + "loss": 1.1461, + "step": 92 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996296786088596, + "loss": 1.0397, + "step": 93 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019961946980917456, + "loss": 1.17, + "step": 94 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019960912246939485, + "loss": 1.0679, + "step": 95 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019959863660391045, + "loss": 1.0839, + "step": 96 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019958801222730394, + "loss": 1.0937, + "step": 97 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019957724935435063, + "loss": 1.1668, + "step": 98 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019956634800001832, + "loss": 1.0858, + "step": 99 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019955530817946748, + "loss": 1.0935, + "step": 100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019954412990805107, + "loss": 1.1046, + "step": 101 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019953281320131468, + "loss": 1.1319, + "step": 102 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019952135807499633, + "loss": 1.1108, + "step": 103 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001995097645450266, + "loss": 1.0485, + "step": 104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019949803262752855, + "loss": 1.0862, + "step": 105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019948616233881768, + "loss": 1.268, + "step": 106 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019947415369540189, + "loss": 1.0926, + "step": 107 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001994620067139815, + "loss": 1.1427, + "step": 108 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019944972141144928, + "loss": 1.0754, + "step": 109 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019943729780489027, + "loss": 1.0044, + "step": 110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001994247359115819, + "loss": 1.1304, + "step": 111 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019941203574899393, + "loss": 1.1683, + "step": 112 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019939919733478838, + "loss": 1.1559, + "step": 113 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019938622068681953, + "loss": 1.1879, + "step": 114 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019937310582313392, + "loss": 1.0613, + "step": 115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993598527619703, + "loss": 1.1196, + "step": 116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993464615217596, + "loss": 1.0762, + "step": 117 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019933293212112495, + "loss": 1.1059, + "step": 118 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019931926457888156, + "loss": 1.0831, + "step": 119 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019930545891403678, + "loss": 1.0552, + "step": 120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019929151514579008, + "loss": 1.15, + "step": 121 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019927743329353295, + "loss": 1.1038, + "step": 122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992632133768489, + "loss": 1.067, + "step": 123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992488554155135, + "loss": 1.1311, + "step": 124 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019923435942949426, + "loss": 1.1402, + "step": 125 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019921972543895066, + "loss": 1.0453, + "step": 126 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019920495346423402, + "loss": 1.1567, + "step": 127 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019919004352588767, + "loss": 1.137, + "step": 128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001991749956446468, + "loss": 0.9986, + "step": 129 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019915980984143832, + "loss": 1.083, + "step": 130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019914448613738106, + "loss": 1.0619, + "step": 131 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019912902455378556, + "loss": 1.1294, + "step": 132 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019911342511215414, + "loss": 1.0965, + "step": 133 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019909768783418086, + "loss": 1.0216, + "step": 134 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019908181274175138, + "loss": 1.0081, + "step": 135 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990657998569432, + "loss": 1.0246, + "step": 136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990496492020252, + "loss": 1.1249, + "step": 137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019903336079945804, + "loss": 1.0518, + "step": 138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019901693467189386, + "loss": 1.189, + "step": 139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019900037084217637, + "loss": 1.1475, + "step": 140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989836693333408, + "loss": 1.2259, + "step": 141 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989668301686138, + "loss": 1.0399, + "step": 142 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989498533714135, + "loss": 1.128, + "step": 143 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019893273896534936, + "loss": 1.014, + "step": 144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001989154869742223, + "loss": 1.1552, + "step": 145 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019889809742202455, + "loss": 1.1159, + "step": 146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988805703329396, + "loss": 1.0218, + "step": 147 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019886290573134228, + "loss": 1.1723, + "step": 148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988451036417986, + "loss": 1.2132, + "step": 149 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019882716408906585, + "loss": 1.112, + "step": 150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001988090870980924, + "loss": 1.0856, + "step": 151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001987908726940178, + "loss": 1.0951, + "step": 152 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019877252090217271, + "loss": 1.0218, + "step": 153 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019875403174807882, + "loss": 1.0552, + "step": 154 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019873540525744887, + "loss": 1.1481, + "step": 155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019871664145618657, + "loss": 1.169, + "step": 156 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019869774037038665, + "loss": 1.0802, + "step": 157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986787020263347, + "loss": 1.0871, + "step": 158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986595264505072, + "loss": 1.1022, + "step": 159 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019864021366957147, + "loss": 1.0257, + "step": 160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986207637103857, + "loss": 1.0986, + "step": 161 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019860117659999878, + "loss": 1.0837, + "step": 162 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019858145236565037, + "loss": 1.1895, + "step": 163 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019856159103477086, + "loss": 1.052, + "step": 164 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019854159263498123, + "loss": 1.1184, + "step": 165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001985214571940931, + "loss": 1.0895, + "step": 166 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019850118474010872, + "loss": 1.0764, + "step": 167 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019848077530122083, + "loss": 1.1387, + "step": 168 + }, + { + "epoch": 0.25, + "eval_loss": 1.084919810295105, + "eval_runtime": 2.6029, + "eval_samples_per_second": 419.538, + "eval_steps_per_second": 26.509, + "step": 168 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019846022890581267, + "loss": 1.0826, + "step": 169 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198439545582458, + "loss": 1.1366, + "step": 170 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198418725359921, + "loss": 1.1349, + "step": 171 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019839776826715614, + "loss": 1.0636, + "step": 172 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019837667433330838, + "loss": 1.1216, + "step": 173 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001983554435877128, + "loss": 1.1051, + "step": 174 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019833407605989494, + "loss": 1.1558, + "step": 175 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019831257177957044, + "loss": 1.0364, + "step": 176 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019829093077664513, + "loss": 1.0665, + "step": 177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019826915308121504, + "loss": 1.1994, + "step": 178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982472387235662, + "loss": 1.1434, + "step": 179 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982251877341748, + "loss": 1.081, + "step": 180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000198203000143707, + "loss": 1.0653, + "step": 181 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981806759830189, + "loss": 1.0269, + "step": 182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981582152831566, + "loss": 1.1167, + "step": 183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019813561807535598, + "loss": 1.0608, + "step": 184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001981128843910428, + "loss": 1.0989, + "step": 185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980900142618327, + "loss": 1.1405, + "step": 186 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019806700771953097, + "loss": 1.0359, + "step": 187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980438647961327, + "loss": 1.1073, + "step": 188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980205855238225, + "loss": 1.0338, + "step": 189 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019799716993497475, + "loss": 1.1285, + "step": 190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019797361806215332, + "loss": 1.1277, + "step": 191 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019794992993811165, + "loss": 1.119, + "step": 192 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019792610559579265, + "loss": 1.1224, + "step": 193 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019790214506832868, + "loss": 1.1438, + "step": 194 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001978780483890414, + "loss": 1.1462, + "step": 195 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019785381559144196, + "loss": 1.042, + "step": 196 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019782944670923076, + "loss": 1.1022, + "step": 197 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019780494177629735, + "loss": 1.0564, + "step": 198 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019778030082672068, + "loss": 1.0471, + "step": 199 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019775552389476864, + "loss": 1.0636, + "step": 200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001977306110148984, + "loss": 1.0917, + "step": 201 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019770556222175608, + "loss": 1.1965, + "step": 202 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019768037755017685, + "loss": 1.073, + "step": 203 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019765505703518496, + "loss": 1.0636, + "step": 204 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019762960071199333, + "loss": 1.087, + "step": 205 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197604008616004, + "loss": 1.0569, + "step": 206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019757828078280766, + "loss": 1.08, + "step": 207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019755241724818387, + "loss": 1.1536, + "step": 208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019752641804810084, + "loss": 1.1514, + "step": 209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019750028321871546, + "loss": 1.0691, + "step": 210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019747401279637325, + "loss": 1.1289, + "step": 211 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019744760681760832, + "loss": 1.0834, + "step": 212 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019742106531914328, + "loss": 1.0762, + "step": 213 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001973943883378892, + "loss": 1.0913, + "step": 214 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019736757591094558, + "loss": 1.132, + "step": 215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019734062807560027, + "loss": 1.0894, + "step": 216 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019731354486932944, + "loss": 1.0327, + "step": 217 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019728632632979746, + "loss": 1.112, + "step": 218 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019725897249485704, + "loss": 1.0718, + "step": 219 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019723148340254892, + "loss": 1.077, + "step": 220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019720385909110198, + "loss": 1.0335, + "step": 221 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019717609959893318, + "loss": 1.0483, + "step": 222 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019714820496464746, + "loss": 1.0901, + "step": 223 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019712017522703764, + "loss": 0.9921, + "step": 224 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019709201042508455, + "loss": 1.0829, + "step": 225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970637105979567, + "loss": 1.0705, + "step": 226 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970352757850105, + "loss": 1.0481, + "step": 227 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019700670602579008, + "loss": 0.9846, + "step": 228 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001969780013600272, + "loss": 1.1492, + "step": 229 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019694916182764113, + "loss": 1.1745, + "step": 230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019692018746873892, + "loss": 1.0451, + "step": 231 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019689107832361496, + "loss": 1.1217, + "step": 232 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019686183443275116, + "loss": 1.0788, + "step": 233 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019683245583681675, + "loss": 1.0703, + "step": 234 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019680294257666837, + "loss": 1.1521, + "step": 235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967732946933499, + "loss": 1.0659, + "step": 236 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019674351222809242, + "loss": 1.0321, + "step": 237 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967135952223142, + "loss": 1.0555, + "step": 238 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019668354371762066, + "loss": 1.0648, + "step": 239 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019665335775580415, + "loss": 1.0723, + "step": 240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001966230373788441, + "loss": 1.0264, + "step": 241 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019659258262890683, + "loss": 1.0331, + "step": 242 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019656199354834558, + "loss": 1.1514, + "step": 243 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019653127017970034, + "loss": 1.069, + "step": 244 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019650041256569792, + "loss": 0.9623, + "step": 245 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019646942074925172, + "loss": 1.0021, + "step": 246 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019643829477346188, + "loss": 1.1131, + "step": 247 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001964070346816151, + "loss": 1.1426, + "step": 248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001963756405171845, + "loss": 1.0761, + "step": 249 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019634411232382978, + "loss": 1.1112, + "step": 250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019631245014539698, + "loss": 1.081, + "step": 251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019628065402591845, + "loss": 1.1446, + "step": 252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019624872400961284, + "loss": 1.045, + "step": 253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019621666014088494, + "loss": 1.0337, + "step": 254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019618446246432583, + "loss": 1.1764, + "step": 255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019615213102471257, + "loss": 1.0323, + "step": 256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019611966586700823, + "loss": 1.0073, + "step": 257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019608706703636188, + "loss": 1.1615, + "step": 258 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019605433457810855, + "loss": 1.1209, + "step": 259 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019602146853776894, + "loss": 1.0721, + "step": 260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001959884689610497, + "loss": 1.0967, + "step": 261 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019595533589384308, + "loss": 1.0284, + "step": 262 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019592206938222703, + "loss": 1.0148, + "step": 263 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019588866947246498, + "loss": 1.1434, + "step": 264 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019585513621100603, + "loss": 1.1125, + "step": 265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001958214696444846, + "loss": 1.0812, + "step": 266 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019578766981972058, + "loss": 1.0611, + "step": 267 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019575373678371909, + "loss": 1.1029, + "step": 268 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019571967058367064, + "loss": 1.0692, + "step": 269 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019568547126695083, + "loss": 1.0581, + "step": 270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019565113888112036, + "loss": 0.9841, + "step": 271 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019561667347392508, + "loss": 1.0173, + "step": 272 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019558207509329584, + "loss": 1.0805, + "step": 273 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019554734378734824, + "loss": 1.088, + "step": 274 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019551247960438296, + "loss": 1.0481, + "step": 275 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019547748259288536, + "loss": 1.1747, + "step": 276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001954423528015255, + "loss": 1.0407, + "step": 277 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019540709027915818, + "loss": 1.1412, + "step": 278 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953716950748227, + "loss": 1.075, + "step": 279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019533616723774294, + "loss": 0.9863, + "step": 280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953005068173272, + "loss": 1.1426, + "step": 281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001952647138631682, + "loss": 1.0621, + "step": 282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019522878842504295, + "loss": 1.1007, + "step": 283 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019519273055291266, + "loss": 1.0632, + "step": 284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019515654029692278, + "loss": 1.126, + "step": 285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019512021770740288, + "loss": 1.0946, + "step": 286 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001950837628348665, + "loss": 1.0639, + "step": 287 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019504717573001117, + "loss": 1.1432, + "step": 288 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019501045644371832, + "loss": 1.0619, + "step": 289 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001949736050270532, + "loss": 1.0597, + "step": 290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019493662153126481, + "loss": 1.0743, + "step": 291 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001948995060077859, + "loss": 1.1114, + "step": 292 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019486225850823266, + "loss": 1.1435, + "step": 293 + }, + { + "epoch": 0.44, + "learning_rate": 0.000194824879084405, + "loss": 1.1396, + "step": 294 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019478736778828624, + "loss": 1.1597, + "step": 295 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019474972467204297, + "loss": 1.0976, + "step": 296 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019471194978802533, + "loss": 1.0829, + "step": 297 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001946740431887665, + "loss": 1.0437, + "step": 298 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019463600492698296, + "loss": 1.0835, + "step": 299 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019459783505557424, + "loss": 1.0558, + "step": 300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001945595336276229, + "loss": 1.0656, + "step": 301 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019452110069639452, + "loss": 1.1487, + "step": 302 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019448253631533744, + "loss": 1.1383, + "step": 303 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019444384053808288, + "loss": 1.1582, + "step": 304 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019440501341844483, + "loss": 0.9999, + "step": 305 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019436605501041987, + "loss": 1.1317, + "step": 306 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019432696536818717, + "loss": 1.0944, + "step": 307 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019428774454610843, + "loss": 1.1624, + "step": 308 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019424839259872778, + "loss": 1.1644, + "step": 309 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019420890958077167, + "loss": 1.0486, + "step": 310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019416929554714888, + "loss": 1.0705, + "step": 311 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019412955055295034, + "loss": 1.023, + "step": 312 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019408967465344917, + "loss": 1.1144, + "step": 313 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019404966790410047, + "loss": 1.0378, + "step": 314 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019400953036054138, + "loss": 1.036, + "step": 315 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019396926207859084, + "loss": 1.0735, + "step": 316 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019392886311424973, + "loss": 1.0259, + "step": 317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001938883335237006, + "loss": 1.1603, + "step": 318 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938476733633076, + "loss": 1.1282, + "step": 319 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938068826896166, + "loss": 1.063, + "step": 320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019376596155935486, + "loss": 1.1176, + "step": 321 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019372491002943112, + "loss": 1.1307, + "step": 322 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019368372815693549, + "loss": 1.0412, + "step": 323 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019364241599913924, + "loss": 1.1353, + "step": 324 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019360097361349494, + "loss": 1.1293, + "step": 325 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001935594010576362, + "loss": 1.0885, + "step": 326 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019351769838937775, + "loss": 1.0944, + "step": 327 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019347586566671512, + "loss": 1.1435, + "step": 328 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001934339029478248, + "loss": 1.1217, + "step": 329 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019339181029106404, + "loss": 1.1801, + "step": 330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019334958775497083, + "loss": 1.1846, + "step": 331 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019330723539826375, + "loss": 1.0897, + "step": 332 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019326475327984192, + "loss": 1.0643, + "step": 333 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019322214145878487, + "loss": 1.0246, + "step": 334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001931793999943526, + "loss": 1.1108, + "step": 335 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019313652894598543, + "loss": 1.0619, + "step": 336 + }, + { + "epoch": 0.5, + "eval_loss": 1.048388123512268, + "eval_runtime": 2.6045, + "eval_samples_per_second": 419.273, + "eval_steps_per_second": 26.493, + "step": 336 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019309352837330372, + "loss": 1.0014, + "step": 337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001930503983361081, + "loss": 1.0786, + "step": 338 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019300713889437926, + "loss": 1.014, + "step": 339 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019296375010827773, + "loss": 1.1233, + "step": 340 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192920232038144, + "loss": 1.1052, + "step": 341 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001928765847444984, + "loss": 1.0138, + "step": 342 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019283280828804081, + "loss": 1.1536, + "step": 343 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019278890272965096, + "loss": 0.992, + "step": 344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001927448681303879, + "loss": 1.1165, + "step": 345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001927007045514903, + "loss": 1.0565, + "step": 346 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019265641205437611, + "loss": 1.0664, + "step": 347 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001926119907006426, + "loss": 1.0625, + "step": 348 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019256744055206622, + "loss": 1.0393, + "step": 349 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001925227616706026, + "loss": 1.125, + "step": 350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019247795411838627, + "loss": 1.0375, + "step": 351 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019243301795773086, + "loss": 1.0648, + "step": 352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001923879532511287, + "loss": 1.0903, + "step": 353 + }, + { + "epoch": 0.53, + "learning_rate": 0.000192342760061251, + "loss": 1.1219, + "step": 354 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019229743845094755, + "loss": 1.054, + "step": 355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001922519884832469, + "loss": 1.1206, + "step": 356 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019220641022135588, + "loss": 1.1125, + "step": 357 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019216070372865996, + "loss": 1.064, + "step": 358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001921148690687228, + "loss": 1.0843, + "step": 359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019206890630528634, + "loss": 1.1378, + "step": 360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019202281550227064, + "loss": 1.0399, + "step": 361 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919765967237739, + "loss": 1.1762, + "step": 362 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919302500340722, + "loss": 1.0538, + "step": 363 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019188377549761963, + "loss": 1.0343, + "step": 364 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001918371731790479, + "loss": 1.1027, + "step": 365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019179044314316664, + "loss": 1.036, + "step": 366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019174358545496288, + "loss": 1.041, + "step": 367 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019169660017960137, + "loss": 1.0762, + "step": 368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019164948738242409, + "loss": 1.0807, + "step": 369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019160224712895055, + "loss": 1.037, + "step": 370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019155487948487748, + "loss": 1.0625, + "step": 371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001915073845160786, + "loss": 1.062, + "step": 372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019145976228860496, + "loss": 1.1882, + "step": 373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019141201286868435, + "loss": 1.1338, + "step": 374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019136413632272163, + "loss": 1.0174, + "step": 375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019131613271729833, + "loss": 1.0585, + "step": 376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019126800211917276, + "loss": 1.0495, + "step": 377 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001912197445952798, + "loss": 1.123, + "step": 378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019117136021273075, + "loss": 1.0517, + "step": 379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001911228490388136, + "loss": 1.0545, + "step": 380 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019107421114099237, + "loss": 1.0302, + "step": 381 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019102544658690748, + "loss": 1.0908, + "step": 382 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019097655544437545, + "loss": 1.1425, + "step": 383 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019092753778138886, + "loss": 1.0686, + "step": 384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001908783936661162, + "loss": 1.06, + "step": 385 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001908291231669019, + "loss": 1.1296, + "step": 386 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019077972635226604, + "loss": 1.1029, + "step": 387 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019073020329090444, + "loss": 1.0469, + "step": 388 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001906805540516885, + "loss": 1.0427, + "step": 389 + }, + { + "epoch": 0.58, + "learning_rate": 0.000190630778703665, + "loss": 1.0075, + "step": 390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019058087731605624, + "loss": 1.1146, + "step": 391 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001905308499582597, + "loss": 1.1161, + "step": 392 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019048069669984802, + "loss": 1.1419, + "step": 393 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019043041761056907, + "loss": 1.1586, + "step": 394 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019038001276034557, + "loss": 1.0765, + "step": 395 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019032948221927524, + "loss": 1.1225, + "step": 396 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001902788260576305, + "loss": 1.0247, + "step": 397 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019022804434585852, + "loss": 1.135, + "step": 398 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901771371545811, + "loss": 1.1122, + "step": 399 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019012610455459446, + "loss": 1.075, + "step": 400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019007494661686935, + "loss": 1.1121, + "step": 401 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001900236634125507, + "loss": 1.0531, + "step": 402 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018997225501295772, + "loss": 1.0561, + "step": 403 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018992072148958368, + "loss": 1.0803, + "step": 404 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018986906291409595, + "loss": 1.0579, + "step": 405 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018981727935833567, + "loss": 1.0614, + "step": 406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001897653708943179, + "loss": 0.9982, + "step": 407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018971333759423142, + "loss": 1.1498, + "step": 408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018966117953043852, + "loss": 1.1165, + "step": 409 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018960889677547505, + "loss": 1.1155, + "step": 410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018955648940205028, + "loss": 1.0017, + "step": 411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018950395748304678, + "loss": 1.0556, + "step": 412 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018945130109152033, + "loss": 1.0248, + "step": 413 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018939852030069981, + "loss": 1.0155, + "step": 414 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934561518398706, + "loss": 1.0248, + "step": 415 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018929258581495685, + "loss": 0.9835, + "step": 416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001892394322673568, + "loss": 1.1602, + "step": 417 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891861546151071, + "loss": 1.021, + "step": 418 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018913275293230069, + "loss": 1.0526, + "step": 419 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018907922729320285, + "loss": 1.0585, + "step": 420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018902557777225135, + "loss": 1.0327, + "step": 421 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018897180444405614, + "loss": 1.0448, + "step": 422 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001889179073833995, + "loss": 1.0776, + "step": 423 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888638866652356, + "loss": 1.0748, + "step": 424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888097423646907, + "loss": 1.0482, + "step": 425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018875547455706295, + "loss": 1.0394, + "step": 426 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018870108331782217, + "loss": 1.0646, + "step": 427 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018864656872260985, + "loss": 1.0338, + "step": 428 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018859193084723913, + "loss": 0.9848, + "step": 429 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001885371697676944, + "loss": 1.0587, + "step": 430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001884822855601316, + "loss": 1.0711, + "step": 431 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018842727830087778, + "loss": 1.0964, + "step": 432 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018837214806643115, + "loss": 1.0254, + "step": 433 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018831689493346095, + "loss": 1.0748, + "step": 434 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018826151897880728, + "loss": 1.0797, + "step": 435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018820602027948114, + "loss": 1.1068, + "step": 436 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018815039891266418, + "loss": 1.081, + "step": 437 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001880946549557086, + "loss": 1.0685, + "step": 438 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018803878848613716, + "loss": 1.0916, + "step": 439 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018798279958164295, + "loss": 1.115, + "step": 440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018792668832008936, + "loss": 1.0048, + "step": 441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001878704547795099, + "loss": 1.0386, + "step": 442 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018781409903810821, + "loss": 1.0283, + "step": 443 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018775762117425777, + "loss": 1.085, + "step": 444 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018770102126650198, + "loss": 1.0582, + "step": 445 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018764429939355392, + "loss": 1.0705, + "step": 446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875874556342963, + "loss": 1.1426, + "step": 447 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018753049006778132, + "loss": 1.0337, + "step": 448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001874734027732306, + "loss": 1.0993, + "step": 449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018741619383003507, + "loss": 1.0661, + "step": 450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018735886331775476, + "loss": 1.0564, + "step": 451 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018730141131611882, + "loss": 1.0989, + "step": 452 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001872438379050254, + "loss": 1.0984, + "step": 453 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018718614316454133, + "loss": 1.1173, + "step": 454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018712832717490235, + "loss": 1.1005, + "step": 455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018707039001651277, + "loss": 1.0008, + "step": 456 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018701233176994533, + "loss": 1.0701, + "step": 457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018695415251594123, + "loss": 1.0831, + "step": 458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018689585233541003, + "loss": 1.1165, + "step": 459 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018683743130942928, + "loss": 1.0884, + "step": 460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018677888951924474, + "loss": 0.9882, + "step": 461 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018672022704627002, + "loss": 1.086, + "step": 462 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018666144397208668, + "loss": 1.0545, + "step": 463 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018660254037844388, + "loss": 1.0274, + "step": 464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001865435163472584, + "loss": 1.0795, + "step": 465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018648437196061462, + "loss": 1.022, + "step": 466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001864251073007642, + "loss": 1.0717, + "step": 467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018636572245012606, + "loss": 1.1501, + "step": 468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001863062174912863, + "loss": 1.1034, + "step": 469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018624659250699805, + "loss": 1.0784, + "step": 470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018618684758018136, + "loss": 1.1274, + "step": 471 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001861269827939231, + "loss": 1.0643, + "step": 472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606699823147676, + "loss": 1.1394, + "step": 473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018600689397626246, + "loss": 0.9665, + "step": 474 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018594667011186678, + "loss": 1.058, + "step": 475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018588632672204264, + "loss": 1.0706, + "step": 476 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858258638907091, + "loss": 1.0414, + "step": 477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576528170195146, + "loss": 1.1, + "step": 478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018570458024002093, + "loss": 1.1114, + "step": 479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018564375958933459, + "loss": 1.0596, + "step": 480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855828198344753, + "loss": 1.0897, + "step": 481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018552176106019155, + "loss": 1.0316, + "step": 482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018546058335139733, + "loss": 1.0516, + "step": 483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001853992867931721, + "loss": 1.0477, + "step": 484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018533787147076048, + "loss": 1.0432, + "step": 485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018527633746957234, + "loss": 1.0568, + "step": 486 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018521468487518264, + "loss": 1.114, + "step": 487 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018515291377333112, + "loss": 1.0664, + "step": 488 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850910242499225, + "loss": 1.0162, + "step": 489 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850290163910261, + "loss": 1.0829, + "step": 490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018496689028287572, + "loss": 1.1078, + "step": 491 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849046460118698, + "loss": 1.0533, + "step": 492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484228366457095, + "loss": 1.0923, + "step": 493 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018477980332770607, + "loss": 1.0516, + "step": 494 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018471720508816614, + "loss": 0.9826, + "step": 495 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018465448903300606, + "loss": 1.1581, + "step": 496 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001845916552494446, + "loss": 1.1268, + "step": 497 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018452870382486432, + "loss": 1.0483, + "step": 498 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018446563484681127, + "loss": 1.1792, + "step": 499 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018440244840299506, + "loss": 1.0918, + "step": 500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001843391445812886, + "loss": 0.9691, + "step": 501 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018427572346972805, + "loss": 1.0581, + "step": 502 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001842121851565128, + "loss": 1.0072, + "step": 503 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018414852973000503, + "loss": 0.9686, + "step": 504 + }, + { + "epoch": 0.75, + "eval_loss": 1.0276715755462646, + "eval_runtime": 2.6054, + "eval_samples_per_second": 419.124, + "eval_steps_per_second": 26.483, + "step": 504 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018408475727872995, + "loss": 1.1221, + "step": 505 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018402086789137546, + "loss": 1.087, + "step": 506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018395686165679202, + "loss": 1.0599, + "step": 507 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018389273866399275, + "loss": 1.1844, + "step": 508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018382849900215294, + "loss": 1.046, + "step": 509 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018376414276061032, + "loss": 0.9691, + "step": 510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018369967002886464, + "loss": 1.0996, + "step": 511 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001836350808965776, + "loss": 1.083, + "step": 512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018357037545357297, + "loss": 1.0371, + "step": 513 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018350555378983608, + "loss": 1.018, + "step": 514 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018344061599551398, + "loss": 1.095, + "step": 515 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018337556216091517, + "loss": 1.0871, + "step": 516 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001833103923765096, + "loss": 1.0774, + "step": 517 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018324510673292842, + "loss": 1.0337, + "step": 518 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001831797053209639, + "loss": 1.0059, + "step": 519 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018311418823156936, + "loss": 1.0744, + "step": 520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018304855555585894, + "loss": 0.9732, + "step": 521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018298280738510752, + "loss": 1.1176, + "step": 522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018291694381075056, + "loss": 1.1485, + "step": 523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018285096492438424, + "loss": 1.1044, + "step": 524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018278487081776476, + "loss": 0.9812, + "step": 525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018271866158280884, + "loss": 1.0966, + "step": 526 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001826523373115931, + "loss": 1.2406, + "step": 527 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001825858980963543, + "loss": 1.0727, + "step": 528 + }, + { + "epoch": 0.79, + "learning_rate": 0.000182519344029489, + "loss": 0.9966, + "step": 529 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018245267520355346, + "loss": 1.081, + "step": 530 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018238589171126353, + "loss": 1.1104, + "step": 531 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018231899364549455, + "loss": 1.0535, + "step": 532 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018225198109928114, + "loss": 1.0801, + "step": 533 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018218485416581726, + "loss": 1.0726, + "step": 534 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018211761293845585, + "loss": 1.0923, + "step": 535 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018205025751070875, + "loss": 1.0551, + "step": 536 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018198278797624675, + "loss": 1.0495, + "step": 537 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001819152044288992, + "loss": 1.0589, + "step": 538 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018184750696265408, + "loss": 1.0487, + "step": 539 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001817796956716578, + "loss": 1.0491, + "step": 540 + }, + { + "epoch": 0.81, + "learning_rate": 0.000181711770650215, + "loss": 1.0981, + "step": 541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018164373199278856, + "loss": 1.1706, + "step": 542 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001815755797939994, + "loss": 1.1024, + "step": 543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150731414862622, + "loss": 1.0488, + "step": 544 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018143893515160564, + "loss": 1.165, + "step": 545 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018137044289803181, + "loss": 1.0346, + "step": 546 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018130183748315645, + "loss": 1.1179, + "step": 547 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001812331190023886, + "loss": 1.0027, + "step": 548 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018116428755129459, + "loss": 1.1106, + "step": 549 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018109534322559783, + "loss": 1.0479, + "step": 550 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018102628612117865, + "loss": 1.0046, + "step": 551 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001809571163340744, + "loss": 0.9883, + "step": 552 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018088783396047893, + "loss": 1.1018, + "step": 553 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018081843909674276, + "loss": 1.1389, + "step": 554 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018074893183937283, + "loss": 1.0751, + "step": 555 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018067931228503246, + "loss": 1.1475, + "step": 556 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018060958053054096, + "loss": 1.0829, + "step": 557 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018053973667287387, + "loss": 1.0272, + "step": 558 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018046978080916252, + "loss": 1.0668, + "step": 559 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018039971303669407, + "loss": 1.0988, + "step": 560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018032953345291123, + "loss": 1.0339, + "step": 561 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001802592421554123, + "loss": 1.0654, + "step": 562 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018018883924195085, + "loss": 1.0157, + "step": 563 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018011832481043576, + "loss": 1.0738, + "step": 564 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001800476989589309, + "loss": 1.0742, + "step": 565 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001799769617856552, + "loss": 0.9861, + "step": 566 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001799061133889823, + "loss": 1.0788, + "step": 567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017983515386744061, + "loss": 1.0539, + "step": 568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017976408331971298, + "loss": 1.0875, + "step": 569 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001796929018446368, + "loss": 1.0765, + "step": 570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017962160954120354, + "loss": 1.1336, + "step": 571 + }, + { + "epoch": 0.85, + "learning_rate": 0.000179550206508559, + "loss": 0.9674, + "step": 572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017947869284600282, + "loss": 1.0607, + "step": 573 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001794070686529886, + "loss": 0.9959, + "step": 574 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017933533402912354, + "loss": 1.038, + "step": 575 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001792634890741685, + "loss": 1.1342, + "step": 576 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017919153388803774, + "loss": 1.0941, + "step": 577 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017911946857079888, + "loss": 1.1286, + "step": 578 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017904729322267256, + "loss": 1.0354, + "step": 579 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001789750079440326, + "loss": 1.1314, + "step": 580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017890261283540562, + "loss": 1.0365, + "step": 581 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017883010799747099, + "loss": 1.091, + "step": 582 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017875749353106062, + "loss": 0.9995, + "step": 583 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178684769537159, + "loss": 1.0435, + "step": 584 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017861193611690287, + "loss": 1.0555, + "step": 585 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017853899337158112, + "loss": 1.0637, + "step": 586 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017846594140263474, + "loss": 1.064, + "step": 587 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017839278031165658, + "loss": 0.9879, + "step": 588 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017831951020039126, + "loss": 1.0846, + "step": 589 + }, + { + "epoch": 0.88, + "learning_rate": 0.000178246131170735, + "loss": 1.0373, + "step": 590 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017817264332473546, + "loss": 1.0377, + "step": 591 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017809904676459177, + "loss": 1.0932, + "step": 592 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017802534159265404, + "loss": 1.085, + "step": 593 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001779515279114236, + "loss": 1.0975, + "step": 594 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001778776058235526, + "loss": 1.1283, + "step": 595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017780357543184397, + "loss": 1.0652, + "step": 596 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017772943683925122, + "loss": 1.0336, + "step": 597 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017765519014887842, + "loss": 0.9761, + "step": 598 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001775808354639799, + "loss": 1.0688, + "step": 599 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017750637288796016, + "loss": 1.1031, + "step": 600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017743180252437383, + "loss": 1.083, + "step": 601 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017735712447692538, + "loss": 1.1612, + "step": 602 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017728233884946903, + "loss": 1.1618, + "step": 603 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017720744574600863, + "loss": 1.144, + "step": 604 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001771324452706975, + "loss": 1.1174, + "step": 605 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017705733752783825, + "loss": 0.9728, + "step": 606 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001769821226218827, + "loss": 1.0599, + "step": 607 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001769068006574317, + "loss": 1.0639, + "step": 608 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017683137173923495, + "loss": 1.1278, + "step": 609 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017675583597219095, + "loss": 0.9925, + "step": 610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766801934613467, + "loss": 1.0457, + "step": 611 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766044443118978, + "loss": 1.0348, + "step": 612 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176528588629188, + "loss": 1.022, + "step": 613 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017645262651870926, + "loss": 1.0027, + "step": 614 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017637655808610156, + "loss": 1.0491, + "step": 615 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017630038343715275, + "loss": 1.0413, + "step": 616 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017622410267779834, + "loss": 1.0358, + "step": 617 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017614771591412148, + "loss": 1.1125, + "step": 618 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017607122325235267, + "loss": 1.1185, + "step": 619 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017599462479886974, + "loss": 1.0738, + "step": 620 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017591792066019765, + "loss": 1.102, + "step": 621 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017584111094300827, + "loss": 1.065, + "step": 622 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001757641957541203, + "loss": 1.0514, + "step": 623 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001756871752004992, + "loss": 1.0396, + "step": 624 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017561004938925688, + "loss": 1.1027, + "step": 625 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017553281842765169, + "loss": 1.0223, + "step": 626 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017545548242308816, + "loss": 1.1793, + "step": 627 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017537804148311695, + "loss": 1.0642, + "step": 628 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017530049571543464, + "loss": 1.0682, + "step": 629 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017522284522788353, + "loss": 1.0476, + "step": 630 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017514509012845164, + "loss": 1.1064, + "step": 631 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017506723052527242, + "loss": 1.0258, + "step": 632 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017498926652662476, + "loss": 1.1954, + "step": 633 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001749111982409325, + "loss": 1.0637, + "step": 634 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017483302577676475, + "loss": 0.9685, + "step": 635 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017475474924283536, + "loss": 1.0465, + "step": 636 + }, + { + "epoch": 0.95, + "learning_rate": 0.000174676368748003, + "loss": 1.0161, + "step": 637 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017459788440127083, + "loss": 1.0479, + "step": 638 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017451929631178648, + "loss": 1.1166, + "step": 639 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001744406045888419, + "loss": 1.0634, + "step": 640 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017436180934187308, + "loss": 1.0826, + "step": 641 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017428291068046, + "loss": 1.07, + "step": 642 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017420390871432647, + "loss": 1.1167, + "step": 643 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017412480355334005, + "loss": 1.0347, + "step": 644 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017404559530751162, + "loss": 1.0393, + "step": 645 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017396628408699555, + "loss": 1.1108, + "step": 646 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017388687000208946, + "loss": 1.006, + "step": 647 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001738073531632339, + "loss": 1.0932, + "step": 648 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001737277336810124, + "loss": 1.0123, + "step": 649 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017364801166615124, + "loss": 1.1273, + "step": 650 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001735681872295192, + "loss": 0.9893, + "step": 651 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001734882604821276, + "loss": 1.0699, + "step": 652 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017340823153513002, + "loss": 1.0901, + "step": 653 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017332810049982208, + "loss": 1.0212, + "step": 654 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017324786748764155, + "loss": 0.9898, + "step": 655 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017316753261016783, + "loss": 1.0899, + "step": 656 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017308709597912213, + "loss": 1.085, + "step": 657 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017300655770636708, + "loss": 1.091, + "step": 658 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017292591790390665, + "loss": 1.0502, + "step": 659 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001728451766838861, + "loss": 1.2131, + "step": 660 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017276433415859167, + "loss": 1.1256, + "step": 661 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017268339044045042, + "loss": 1.0577, + "step": 662 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017260234564203032, + "loss": 1.0012, + "step": 663 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017252119987603973, + "loss": 1.0611, + "step": 664 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017243995325532755, + "loss": 1.1251, + "step": 665 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017235860589288277, + "loss": 1.0959, + "step": 666 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001722771579018347, + "loss": 1.1413, + "step": 667 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017219560939545246, + "loss": 1.0728, + "step": 668 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017211396048714498, + "loss": 1.0461, + "step": 669 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001720322112904608, + "loss": 1.1084, + "step": 670 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017195036191908797, + "loss": 1.1316, + "step": 671 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017186841248685383, + "loss": 1.0816, + "step": 672 + }, + { + "epoch": 1.0, + "eval_loss": 1.0170178413391113, + "eval_runtime": 2.6119, + "eval_samples_per_second": 418.079, + "eval_steps_per_second": 26.417, + "step": 672 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001717863631077249, + "loss": 1.0711, + "step": 673 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017170421389580667, + "loss": 1.1245, + "step": 674 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017162196496534342, + "loss": 1.0519, + "step": 675 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001715396164307182, + "loss": 1.104, + "step": 676 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017145716840645254, + "loss": 1.1193, + "step": 677 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017137462100720631, + "loss": 1.1238, + "step": 678 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017129197434777763, + "loss": 1.004, + "step": 679 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017120922854310257, + "loss": 1.0426, + "step": 680 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017112638370825515, + "loss": 1.0308, + "step": 681 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017104343995844715, + "loss": 1.0892, + "step": 682 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017096039740902784, + "loss": 1.0115, + "step": 683 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017087725617548385, + "loss": 1.1011, + "step": 684 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017079401637343914, + "loss": 0.9829, + "step": 685 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017071067811865476, + "loss": 0.9738, + "step": 686 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001706272415270286, + "loss": 1.0563, + "step": 687 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017054370671459532, + "loss": 1.0153, + "step": 688 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001704600737975262, + "loss": 1.0638, + "step": 689 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170376342892129, + "loss": 1.0053, + "step": 690 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017029251411484765, + "loss": 1.0178, + "step": 691 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017020858758226229, + "loss": 1.0755, + "step": 692 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017012456341108885, + "loss": 0.9365, + "step": 693 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017004044171817925, + "loss": 1.0666, + "step": 694 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016995622262052092, + "loss": 1.041, + "step": 695 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016987190623523674, + "loss": 1.0387, + "step": 696 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016978749267958495, + "loss": 0.9332, + "step": 697 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016970298207095885, + "loss": 1.0737, + "step": 698 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016961837452688676, + "loss": 0.992, + "step": 699 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016953367016503182, + "loss": 0.9997, + "step": 700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016944886910319173, + "loss": 1.1054, + "step": 701 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016936397145929878, + "loss": 0.9876, + "step": 702 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016927897735141952, + "loss": 1.0158, + "step": 703 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016919388689775464, + "loss": 0.9771, + "step": 704 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016910870021663883, + "loss": 0.942, + "step": 705 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016902341742654065, + "loss": 1.0217, + "step": 706 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016893803864606222, + "loss": 1.0346, + "step": 707 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016885256399393924, + "loss": 0.9891, + "step": 708 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016876699358904068, + "loss": 0.9697, + "step": 709 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016868132755036875, + "loss": 1.0062, + "step": 710 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016859556599705856, + "loss": 0.9822, + "step": 711 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001685097090483781, + "loss": 1.0921, + "step": 712 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016842375682372805, + "loss": 1.0126, + "step": 713 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016833770944264153, + "loss": 1.0043, + "step": 714 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016825156702478407, + "loss": 0.952, + "step": 715 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016816532968995328, + "loss": 1.0423, + "step": 716 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016807899755807886, + "loss": 1.0465, + "step": 717 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016799257074922224, + "loss": 0.9827, + "step": 718 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016790604938357663, + "loss": 0.9798, + "step": 719 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016781943358146664, + "loss": 1.0268, + "step": 720 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016773272346334828, + "loss": 1.0007, + "step": 721 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001676459191498087, + "loss": 0.9989, + "step": 722 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016755902076156604, + "loss": 0.9374, + "step": 723 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016747202841946928, + "loss": 1.0031, + "step": 724 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016738494224449802, + "loss": 0.9751, + "step": 725 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016729776235776246, + "loss": 1.1055, + "step": 726 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016721048888050302, + "loss": 1.0527, + "step": 727 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001671231219340903, + "loss": 1.0048, + "step": 728 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001670356616400249, + "loss": 0.957, + "step": 729 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016694810811993723, + "loss": 1.0598, + "step": 730 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016686046149558736, + "loss": 1.02, + "step": 731 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016677272188886483, + "loss": 0.9973, + "step": 732 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016668488942178856, + "loss": 1.0685, + "step": 733 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016659696421650645, + "loss": 0.9783, + "step": 734 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016650894639529544, + "loss": 0.9767, + "step": 735 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016642083608056141, + "loss": 1.0192, + "step": 736 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016633263339483866, + "loss": 1.0121, + "step": 737 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016624433846079012, + "loss": 0.9817, + "step": 738 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016615595140120686, + "loss": 1.1145, + "step": 739 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016606747233900815, + "loss": 0.9862, + "step": 740 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016597890139724125, + "loss": 1.0606, + "step": 741 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658902386990811, + "loss": 1.0416, + "step": 742 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658014843678303, + "loss": 0.9971, + "step": 743 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016571263852691888, + "loss": 1.0318, + "step": 744 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001656237012999041, + "loss": 1.0633, + "step": 745 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001655346728104704, + "loss": 1.0418, + "step": 746 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016544555318242897, + "loss": 0.9308, + "step": 747 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016535634253971794, + "loss": 1.1049, + "step": 748 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001652670410064019, + "loss": 0.9377, + "step": 749 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016517764870667182, + "loss": 0.9934, + "step": 750 + }, + { + "epoch": 1.1, + "learning_rate": 0.000165088165764845, + "loss": 1.0467, + "step": 751 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016499859230536466, + "loss": 1.0172, + "step": 752 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649089284528001, + "loss": 0.9922, + "step": 753 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016481917433184607, + "loss": 1.0373, + "step": 754 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001647293300673231, + "loss": 1.0377, + "step": 755 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016463939578417692, + "loss": 0.9991, + "step": 756 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016454937160747854, + "loss": 1.0657, + "step": 757 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016445925766242391, + "loss": 0.9954, + "step": 758 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001643690540743339, + "loss": 1.018, + "step": 759 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016427876096865394, + "loss": 1.01, + "step": 760 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001641883784709541, + "loss": 0.9318, + "step": 761 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001640979067069286, + "loss": 1.0174, + "step": 762 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016400734580239594, + "loss": 1.0886, + "step": 763 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001639166958832985, + "loss": 1.0316, + "step": 764 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638259570757025, + "loss": 1.0514, + "step": 765 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001637351295057978, + "loss": 0.9914, + "step": 766 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016364421329989755, + "loss": 1.0529, + "step": 767 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016355320858443842, + "loss": 0.9689, + "step": 768 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016346211548597995, + "loss": 1.0398, + "step": 769 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001633709341312046, + "loss": 1.0127, + "step": 770 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016327966464691778, + "loss": 1.1388, + "step": 771 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016318830716004722, + "loss": 0.9659, + "step": 772 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016309686179764317, + "loss": 0.9907, + "step": 773 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016300532868687806, + "loss": 0.9168, + "step": 774 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629137079550463, + "loss": 1.06, + "step": 775 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016282199972956425, + "loss": 0.9826, + "step": 776 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016273020413796983, + "loss": 1.0496, + "step": 777 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001626383213079226, + "loss": 1.0245, + "step": 778 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016254635136720328, + "loss": 1.036, + "step": 779 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001624542944437139, + "loss": 1.0283, + "step": 780 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016236215066547734, + "loss": 1.0078, + "step": 781 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016226992016063723, + "loss": 0.9819, + "step": 782 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016217760305745803, + "loss": 1.0687, + "step": 783 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001620851994843244, + "loss": 1.0523, + "step": 784 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016199270956974128, + "loss": 1.0279, + "step": 785 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016190013344233388, + "loss": 1.0559, + "step": 786 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016180747123084705, + "loss": 1.0844, + "step": 787 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016171472306414554, + "loss": 1.0724, + "step": 788 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016162188907121354, + "loss": 0.9696, + "step": 789 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016152896938115464, + "loss": 0.9551, + "step": 790 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614359641231916, + "loss": 1.0032, + "step": 791 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613428734266662, + "loss": 1.1404, + "step": 792 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016124969742103897, + "loss": 1.0329, + "step": 793 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016115643623588915, + "loss": 1.039, + "step": 794 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610630900009144, + "loss": 1.0231, + "step": 795 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609696588459307, + "loss": 1.0659, + "step": 796 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016087614290087208, + "loss": 1.0029, + "step": 797 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001607825422957905, + "loss": 0.985, + "step": 798 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016068885716085567, + "loss": 0.9392, + "step": 799 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016059508762635482, + "loss": 1.006, + "step": 800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016050123382269264, + "loss": 1.0748, + "step": 801 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604072958803909, + "loss": 1.1378, + "step": 802 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016031327393008845, + "loss": 1.058, + "step": 803 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016021916810254097, + "loss": 0.9827, + "step": 804 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016012497852862075, + "loss": 0.9572, + "step": 805 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016003070533931657, + "loss": 1.0042, + "step": 806 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015993634866573347, + "loss": 0.9521, + "step": 807 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001598419086390927, + "loss": 0.9395, + "step": 808 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015974738539073125, + "loss": 1.0902, + "step": 809 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015965277905210195, + "loss": 1.0408, + "step": 810 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015955808975477319, + "loss": 1.0436, + "step": 811 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015946331763042867, + "loss": 1.0845, + "step": 812 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015936846281086736, + "loss": 1.0752, + "step": 813 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015927352542800317, + "loss": 1.0832, + "step": 814 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015917850561386488, + "loss": 0.9901, + "step": 815 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015908340350059583, + "loss": 1.0311, + "step": 816 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015898821922045385, + "loss": 0.9858, + "step": 817 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001588929529058111, + "loss": 0.9541, + "step": 818 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015879760468915372, + "loss": 0.9516, + "step": 819 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015870217470308188, + "loss": 1.0791, + "step": 820 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015860666308030932, + "loss": 0.9099, + "step": 821 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015851106995366337, + "loss": 1.0983, + "step": 822 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015841539545608478, + "loss": 0.9951, + "step": 823 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015831963972062733, + "loss": 0.9661, + "step": 824 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015822380288045792, + "loss": 1.0111, + "step": 825 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001581278850688561, + "loss": 1.0436, + "step": 826 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015803188641921417, + "loss": 1.0916, + "step": 827 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001579358070650367, + "loss": 1.0347, + "step": 828 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001578396471399406, + "loss": 0.9978, + "step": 829 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577434067776548, + "loss": 1.0036, + "step": 830 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015764708611202015, + "loss": 1.0387, + "step": 831 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015755068527698902, + "loss": 1.0172, + "step": 832 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015745420440662543, + "loss": 0.9723, + "step": 833 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001573576436351046, + "loss": 0.9662, + "step": 834 + }, + { + "epoch": 1.23, + "learning_rate": 0.000157261003096713, + "loss": 0.9849, + "step": 835 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015716428292584787, + "loss": 1.0198, + "step": 836 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015706748325701732, + "loss": 0.9015, + "step": 837 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569706042248399, + "loss": 1.001, + "step": 838 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001568736459640447, + "loss": 0.9681, + "step": 839 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015677660860947078, + "loss": 1.0513, + "step": 840 + }, + { + "epoch": 1.23, + "eval_loss": 1.008791446685791, + "eval_runtime": 2.6133, + "eval_samples_per_second": 417.867, + "eval_steps_per_second": 26.404, + "step": 840 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001566794922960674, + "loss": 0.9829, + "step": 841 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015658229715889347, + "loss": 1.0362, + "step": 842 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015648502333311757, + "loss": 0.9736, + "step": 843 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001563876709540178, + "loss": 1.0457, + "step": 844 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015629024015698136, + "loss": 0.9786, + "step": 845 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015619273107750462, + "loss": 1.0595, + "step": 846 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001560951438511927, + "loss": 1.0307, + "step": 847 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015599747861375955, + "loss": 1.0386, + "step": 848 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015589973550102747, + "loss": 0.9916, + "step": 849 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015580191464892716, + "loss": 0.9652, + "step": 850 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015570401619349736, + "loss": 0.9691, + "step": 851 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015560604027088477, + "loss": 1.0006, + "step": 852 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015550798701734385, + "loss": 1.0271, + "step": 853 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015540985656923645, + "loss": 1.0591, + "step": 854 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015531164906303207, + "loss": 0.967, + "step": 855 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015521336463530705, + "loss": 1.0466, + "step": 856 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001551150034227449, + "loss": 0.9953, + "step": 857 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001550165655621359, + "loss": 0.9899, + "step": 858 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015491805119037684, + "loss": 0.9742, + "step": 859 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015481946044447099, + "loss": 0.9865, + "step": 860 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001547207934615278, + "loss": 0.9384, + "step": 861 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015462205037876275, + "loss": 1.0216, + "step": 862 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015452323133349714, + "loss": 0.9467, + "step": 863 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001544243364631579, + "loss": 1.0038, + "step": 864 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001543253659052775, + "loss": 0.978, + "step": 865 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015422631979749354, + "loss": 1.0434, + "step": 866 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015412719827754873, + "loss": 1.0091, + "step": 867 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015402800148329071, + "loss": 0.9598, + "step": 868 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015392872955267175, + "loss": 0.9876, + "step": 869 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015382938262374865, + "loss": 0.9559, + "step": 870 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001537299608346824, + "loss": 0.9984, + "step": 871 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015363046432373824, + "loss": 1.0171, + "step": 872 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001535308932292853, + "loss": 1.0188, + "step": 873 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015343124768979637, + "loss": 0.9613, + "step": 874 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015333152784384777, + "loss": 0.9572, + "step": 875 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532317338301192, + "loss": 1.0093, + "step": 876 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015313186578739353, + "loss": 0.9935, + "step": 877 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001530319238545565, + "loss": 1.0371, + "step": 878 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015293190817059667, + "loss": 1.0022, + "step": 879 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015283181887460517, + "loss": 1.0033, + "step": 880 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015273165610577542, + "loss": 0.9986, + "step": 881 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015263142000340312, + "loss": 1.0495, + "step": 882 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525311107068859, + "loss": 1.017, + "step": 883 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015243072835572318, + "loss": 0.9757, + "step": 884 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152330273089516, + "loss": 1.0342, + "step": 885 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001522297450479668, + "loss": 1.0059, + "step": 886 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015212914437087922, + "loss": 0.9845, + "step": 887 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001520284711981579, + "loss": 1.0365, + "step": 888 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519277256698083, + "loss": 0.9521, + "step": 889 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001518269079259366, + "loss": 1.0867, + "step": 890 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015172601810674915, + "loss": 1.0444, + "step": 891 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015162505635255287, + "loss": 1.077, + "step": 892 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015152402280375454, + "loss": 0.9883, + "step": 893 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001514229176008607, + "loss": 0.9819, + "step": 894 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015132174088447776, + "loss": 0.9912, + "step": 895 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015122049279531143, + "loss": 0.9575, + "step": 896 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015111917347416671, + "loss": 1.0356, + "step": 897 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015101778306194765, + "loss": 0.9963, + "step": 898 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001509163216996572, + "loss": 0.9728, + "step": 899 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015081478952839693, + "loss": 1.0402, + "step": 900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015071318668936695, + "loss": 1.0287, + "step": 901 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015061151332386566, + "loss": 1.0505, + "step": 902 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015050976957328938, + "loss": 0.9814, + "step": 903 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015040795557913245, + "loss": 1.0083, + "step": 904 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015030607148298696, + "loss": 1.0871, + "step": 905 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015020411742654237, + "loss": 1.0943, + "step": 906 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001501020935515854, + "loss": 1.0631, + "step": 907 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015000000000000001, + "loss": 1.0615, + "step": 908 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014989783691376696, + "loss": 0.8933, + "step": 909 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001497956044349637, + "loss": 1.012, + "step": 910 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014969330270576427, + "loss": 0.9215, + "step": 911 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014959093186843895, + "loss": 0.9894, + "step": 912 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014948849206535412, + "loss": 1.0053, + "step": 913 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014938598343897214, + "loss": 1.0975, + "step": 914 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014928340613185097, + "loss": 1.068, + "step": 915 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001491807602866442, + "loss": 0.9838, + "step": 916 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014907804604610063, + "loss": 1.1493, + "step": 917 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014897526355306428, + "loss": 0.9491, + "step": 918 + }, + { + "epoch": 1.35, + "learning_rate": 0.000148872412950474, + "loss": 1.0252, + "step": 919 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014876949438136347, + "loss": 0.9555, + "step": 920 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014866650798886074, + "loss": 0.9831, + "step": 921 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001485634539161883, + "loss": 1.0957, + "step": 922 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484603323066627, + "loss": 0.9606, + "step": 923 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014835714330369446, + "loss": 1.0643, + "step": 924 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014825388705078777, + "loss": 1.0219, + "step": 925 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014815056369154038, + "loss": 1.1315, + "step": 926 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001480471733696434, + "loss": 1.0406, + "step": 927 + }, + { + "epoch": 1.36, + "learning_rate": 0.000147943716228881, + "loss": 1.0284, + "step": 928 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014784019241313026, + "loss": 1.035, + "step": 929 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014773660206636105, + "loss": 1.0562, + "step": 930 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001476329453326357, + "loss": 0.9813, + "step": 931 + }, + { + "epoch": 1.37, + "learning_rate": 0.000147529222356109, + "loss": 1.0865, + "step": 932 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001474254332810277, + "loss": 1.0074, + "step": 933 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014732157825173044, + "loss": 1.0855, + "step": 934 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014721765741264786, + "loss": 0.9785, + "step": 935 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001471136709083018, + "loss": 1.011, + "step": 936 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014700961888330563, + "loss": 1.0484, + "step": 937 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001469055014823637, + "loss": 1.0435, + "step": 938 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014680131885027141, + "loss": 1.0176, + "step": 939 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014669707113191483, + "loss": 0.9542, + "step": 940 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014659275847227042, + "loss": 0.9526, + "step": 941 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014648838101640518, + "loss": 0.9681, + "step": 942 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014638393890947603, + "loss": 0.9072, + "step": 943 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001462794322967299, + "loss": 0.9939, + "step": 944 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014617486132350343, + "loss": 1.018, + "step": 945 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001460702261352226, + "loss": 0.8993, + "step": 946 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014596552687740302, + "loss": 1.0134, + "step": 947 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014586076369564908, + "loss": 0.947, + "step": 948 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014575593673565426, + "loss": 1.0697, + "step": 949 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014565104614320065, + "loss": 1.006, + "step": 950 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014554609206415885, + "loss": 1.0262, + "step": 951 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014544107464448775, + "loss": 0.9809, + "step": 952 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001453359940302344, + "loss": 0.981, + "step": 953 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014523085036753354, + "loss": 0.9925, + "step": 954 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014512564380260787, + "loss": 1.0199, + "step": 955 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014502037448176734, + "loss": 0.9715, + "step": 956 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014491504255140927, + "loss": 1.0072, + "step": 957 + }, + { + "epoch": 1.41, + "learning_rate": 0.000144809648158018, + "loss": 1.0659, + "step": 958 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014470419144816483, + "loss": 1.0538, + "step": 959 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001445986725685076, + "loss": 1.0571, + "step": 960 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014449309166579072, + "loss": 0.9701, + "step": 961 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014438744888684482, + "loss": 0.9618, + "step": 962 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001442817443785865, + "loss": 0.9179, + "step": 963 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014417597828801832, + "loss": 1.0613, + "step": 964 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014407015076222846, + "loss": 0.9558, + "step": 965 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014396426194839042, + "loss": 0.9823, + "step": 966 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014385831199376317, + "loss": 0.9968, + "step": 967 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014375230104569044, + "loss": 0.9829, + "step": 968 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014364622925160098, + "loss": 1.0552, + "step": 969 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014354009675900803, + "loss": 0.993, + "step": 970 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014343390371550935, + "loss": 1.0927, + "step": 971 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014332765026878687, + "loss": 1.0387, + "step": 972 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014322133656660647, + "loss": 0.9255, + "step": 973 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014311496275681783, + "loss": 1.0093, + "step": 974 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014300852898735435, + "loss": 1.0078, + "step": 975 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014290203540623267, + "loss": 0.9161, + "step": 976 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014279548216155266, + "loss": 1.03, + "step": 977 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014268886940149714, + "loss": 1.0364, + "step": 978 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001425821972743318, + "loss": 0.9768, + "step": 979 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001424754659284048, + "loss": 1.1229, + "step": 980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001423686755121466, + "loss": 1.0362, + "step": 981 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014226182617406996, + "loss": 0.9522, + "step": 982 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215491806276944, + "loss": 1.0479, + "step": 983 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014204795132692144, + "loss": 1.0671, + "step": 984 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014194092611528384, + "loss": 0.8983, + "step": 985 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014183384257669581, + "loss": 1.004, + "step": 986 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014172670086007774, + "loss": 1.0972, + "step": 987 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014161950111443077, + "loss": 1.0198, + "step": 988 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014151224348883692, + "loss": 1.0257, + "step": 989 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014140492813245856, + "loss": 0.9717, + "step": 990 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412975551945384, + "loss": 0.9455, + "step": 991 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001411901248243993, + "loss": 1.0372, + "step": 992 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001410826371714438, + "loss": 0.9961, + "step": 993 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014097509238515432, + "loss": 1.0599, + "step": 994 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014086749061509258, + "loss": 1.0166, + "step": 995 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014075983201089964, + "loss": 1.0254, + "step": 996 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014065211672229555, + "loss": 0.9979, + "step": 997 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014054434489907915, + "loss": 1.0365, + "step": 998 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014043651669112808, + "loss": 1.0075, + "step": 999 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014032863224839814, + "loss": 0.9743, + "step": 1000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014022069172092352, + "loss": 1.0056, + "step": 1001 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014011269525881636, + "loss": 0.9647, + "step": 1002 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014000464301226656, + "loss": 1.0912, + "step": 1003 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013989653513154165, + "loss": 0.8811, + "step": 1004 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013978837176698646, + "loss": 1.0667, + "step": 1005 + }, + { + "epoch": 1.48, + "learning_rate": 0.000139680153069023, + "loss": 1.0096, + "step": 1006 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013957187918815032, + "loss": 0.926, + "step": 1007 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001394635502749441, + "loss": 1.0814, + "step": 1008 + }, + { + "epoch": 1.48, + "eval_loss": 1.0040607452392578, + "eval_runtime": 2.6168, + "eval_samples_per_second": 417.304, + "eval_steps_per_second": 26.368, + "step": 1008 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001393551664800566, + "loss": 1.0941, + "step": 1009 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013924672795421637, + "loss": 1.044, + "step": 1010 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013913823484822815, + "loss": 1.049, + "step": 1011 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013902968731297255, + "loss": 0.9891, + "step": 1012 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013892108549940583, + "loss": 0.9663, + "step": 1013 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013881242955855974, + "loss": 1.0298, + "step": 1014 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001387037196415414, + "loss": 1.0083, + "step": 1015 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001385949558995329, + "loss": 0.9182, + "step": 1016 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013848613848379114, + "loss": 1.013, + "step": 1017 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013837726754564785, + "loss": 1.0022, + "step": 1018 + }, + { + "epoch": 1.5, + "learning_rate": 0.000138268343236509, + "loss": 0.9423, + "step": 1019 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013815936570785487, + "loss": 1.058, + "step": 1020 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013805033511123975, + "loss": 0.931, + "step": 1021 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013794125159829172, + "loss": 1.0137, + "step": 1022 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013783211532071246, + "loss": 1.0517, + "step": 1023 + }, + { + "epoch": 1.51, + "learning_rate": 0.000137722926430277, + "loss": 1.0259, + "step": 1024 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013761368507883359, + "loss": 1.0263, + "step": 1025 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013750439141830339, + "loss": 1.0286, + "step": 1026 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013739504560068033, + "loss": 0.9749, + "step": 1027 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013728564777803088, + "loss": 0.9317, + "step": 1028 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013717619810249378, + "loss": 1.0653, + "step": 1029 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013706669672627997, + "loss": 0.9623, + "step": 1030 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013695714380167223, + "loss": 0.9911, + "step": 1031 + }, + { + "epoch": 1.52, + "learning_rate": 0.000136847539481025, + "loss": 0.9843, + "step": 1032 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001367378839167643, + "loss": 0.981, + "step": 1033 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013662817726138728, + "loss": 1.0651, + "step": 1034 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013651841966746232, + "loss": 1.0602, + "step": 1035 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001364086112876284, + "loss": 0.9524, + "step": 1036 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013629875227459532, + "loss": 1.0264, + "step": 1037 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013618884278114324, + "loss": 1.0691, + "step": 1038 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013607888296012259, + "loss": 1.0527, + "step": 1039 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001359688729644536, + "loss": 0.9629, + "step": 1040 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001358588129471264, + "loss": 0.957, + "step": 1041 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013574870306120077, + "loss": 1.0976, + "step": 1042 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013563854345980569, + "loss": 0.9317, + "step": 1043 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013552833429613938, + "loss": 1.0359, + "step": 1044 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001354180757234689, + "loss": 1.0642, + "step": 1045 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001353077678951301, + "loss": 1.0526, + "step": 1046 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013519741096452726, + "loss": 1.0276, + "step": 1047 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013508700508513307, + "loss": 1.0471, + "step": 1048 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001349765504104881, + "loss": 1.0353, + "step": 1049 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013486604709420102, + "loss": 1.0025, + "step": 1050 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013475549528994786, + "loss": 0.9019, + "step": 1051 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013464489515147238, + "loss": 1.0453, + "step": 1052 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013453424683258528, + "loss": 1.0395, + "step": 1053 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001344235504871645, + "loss": 0.8939, + "step": 1054 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013431280626915467, + "loss": 0.9198, + "step": 1055 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013420201433256689, + "loss": 1.0046, + "step": 1056 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340911748314788, + "loss": 0.9197, + "step": 1057 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013398028792003413, + "loss": 0.9547, + "step": 1058 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013386935375244246, + "loss": 0.968, + "step": 1059 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013375837248297926, + "loss": 0.9611, + "step": 1060 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013364734426598527, + "loss": 1.0125, + "step": 1061 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013353626925586672, + "loss": 1.0179, + "step": 1062 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013342514760709485, + "loss": 1.04, + "step": 1063 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013331397947420576, + "loss": 0.9251, + "step": 1064 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013320276501180015, + "loss": 1.0762, + "step": 1065 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013309150437454322, + "loss": 1.0137, + "step": 1066 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013298019771716435, + "loss": 0.9981, + "step": 1067 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001328688451944569, + "loss": 1.003, + "step": 1068 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013275744696127805, + "loss": 1.0307, + "step": 1069 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013264600317254853, + "loss": 1.0257, + "step": 1070 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013253451398325249, + "loss": 1.0426, + "step": 1071 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013242297954843711, + "loss": 1.0167, + "step": 1072 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013231140002321253, + "loss": 1.012, + "step": 1073 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013219977556275163, + "loss": 1.0649, + "step": 1074 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013208810632228977, + "loss": 0.9297, + "step": 1075 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013197639245712454, + "loss": 0.9772, + "step": 1076 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013186463412261565, + "loss": 1.0194, + "step": 1077 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013175283147418465, + "loss": 1.0596, + "step": 1078 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013164098466731468, + "loss": 0.9938, + "step": 1079 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013152909385755025, + "loss": 0.9405, + "step": 1080 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001314171592004972, + "loss": 1.0175, + "step": 1081 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013130518085182225, + "loss": 0.9994, + "step": 1082 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013119315896725287, + "loss": 0.9524, + "step": 1083 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013108109370257712, + "loss": 0.9112, + "step": 1084 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013096898521364338, + "loss": 0.9339, + "step": 1085 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013085683365636014, + "loss": 0.9718, + "step": 1086 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001307446391866958, + "loss": 0.969, + "step": 1087 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013063240196067836, + "loss": 1.0255, + "step": 1088 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013052012213439536, + "loss": 1.0119, + "step": 1089 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013040779986399362, + "loss": 1.0396, + "step": 1090 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013029543530567884, + "loss": 1.0202, + "step": 1091 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001301830286157157, + "loss": 0.9024, + "step": 1092 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013007057995042732, + "loss": 1.0079, + "step": 1093 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001299580894661953, + "loss": 0.9771, + "step": 1094 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001298455573194594, + "loss": 0.9942, + "step": 1095 + }, + { + "epoch": 1.61, + "learning_rate": 0.00012973298366671725, + "loss": 0.9879, + "step": 1096 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012962036866452422, + "loss": 0.9365, + "step": 1097 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295077124694932, + "loss": 1.0128, + "step": 1098 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012939501523829444, + "loss": 1.0707, + "step": 1099 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012928227712765504, + "loss": 0.9769, + "step": 1100 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012916949829435922, + "loss": 1.0208, + "step": 1101 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001290566788952477, + "loss": 1.0376, + "step": 1102 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012894381908721756, + "loss": 1.0588, + "step": 1103 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001288309190272222, + "loss": 1.0217, + "step": 1104 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012871797887227087, + "loss": 0.9684, + "step": 1105 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012860499877942875, + "loss": 0.9753, + "step": 1106 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012849197890581638, + "loss": 1.0094, + "step": 1107 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012837891940860972, + "loss": 1.0346, + "step": 1108 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012826582044503978, + "loss": 0.8741, + "step": 1109 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012815268217239252, + "loss": 1.0223, + "step": 1110 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012803950474800862, + "loss": 0.8748, + "step": 1111 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012792628832928302, + "loss": 1.0296, + "step": 1112 + }, + { + "epoch": 1.64, + "learning_rate": 0.000127813033073665, + "loss": 0.9993, + "step": 1113 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012769973913865794, + "loss": 1.0555, + "step": 1114 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012758640668181882, + "loss": 1.0245, + "step": 1115 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001274730358607583, + "loss": 0.9502, + "step": 1116 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012735962683314042, + "loss": 1.0165, + "step": 1117 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272461797566823, + "loss": 1.0669, + "step": 1118 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127132694789154, + "loss": 0.8676, + "step": 1119 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001270191720883782, + "loss": 0.9432, + "step": 1120 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012690561181223024, + "loss": 1.0614, + "step": 1121 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001267920141186375, + "loss": 0.9924, + "step": 1122 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012667837916557954, + "loss": 1.139, + "step": 1123 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012656470711108764, + "loss": 1.0043, + "step": 1124 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012645099811324476, + "loss": 1.0747, + "step": 1125 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001263372523301852, + "loss": 0.9668, + "step": 1126 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012622346992009447, + "loss": 0.9931, + "step": 1127 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012610965104120885, + "loss": 0.9393, + "step": 1128 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012599579585181552, + "loss": 0.9918, + "step": 1129 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012588190451025207, + "loss": 1.0172, + "step": 1130 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012576797717490644, + "loss": 1.0586, + "step": 1131 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012565401400421651, + "loss": 1.0482, + "step": 1132 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012554001515667008, + "loss": 1.0548, + "step": 1133 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012542598079080456, + "loss": 1.0092, + "step": 1134 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012531191106520672, + "loss": 1.0162, + "step": 1135 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012519780613851254, + "loss": 1.0387, + "step": 1136 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250836661694069, + "loss": 0.9607, + "step": 1137 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012496949131662348, + "loss": 1.0025, + "step": 1138 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012485528173894448, + "loss": 1.0014, + "step": 1139 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012474103759520027, + "loss": 0.9838, + "step": 1140 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001246267590442694, + "loss": 1.0384, + "step": 1141 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012451244624507831, + "loss": 0.9958, + "step": 1142 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012439809935660095, + "loss": 0.9927, + "step": 1143 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001242837185378587, + "loss": 1.0082, + "step": 1144 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012416930394792026, + "loss": 0.9729, + "step": 1145 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012405485574590113, + "loss": 1.0464, + "step": 1146 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012394037409096357, + "loss": 0.987, + "step": 1147 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001238258591423165, + "loss": 0.9402, + "step": 1148 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012371131105921504, + "loss": 1.0293, + "step": 1149 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012359673000096033, + "loss": 0.9418, + "step": 1150 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234821161268995, + "loss": 0.964, + "step": 1151 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012336746959642526, + "loss": 0.9982, + "step": 1152 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001232527905689757, + "loss": 0.9364, + "step": 1153 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012313807920403419, + "loss": 0.9399, + "step": 1154 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001230233356611289, + "loss": 1.015, + "step": 1155 + }, + { + "epoch": 1.7, + "learning_rate": 0.000122908560099833, + "loss": 1.0214, + "step": 1156 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012279375267976398, + "loss": 1.0262, + "step": 1157 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012267891356058377, + "loss": 1.0277, + "step": 1158 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012256404290199825, + "loss": 1.0095, + "step": 1159 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012244914086375724, + "loss": 1.0314, + "step": 1160 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012233420760565428, + "loss": 0.8282, + "step": 1161 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012221924328752616, + "loss": 0.9709, + "step": 1162 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012210424806925301, + "loss": 0.941, + "step": 1163 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012198922211075778, + "loss": 0.9716, + "step": 1164 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012187416557200633, + "loss": 1.0125, + "step": 1165 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012175907861300697, + "loss": 1.0159, + "step": 1166 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012164396139381029, + "loss": 0.9306, + "step": 1167 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012152881407450905, + "loss": 1.1056, + "step": 1168 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012141363681523776, + "loss": 1.0113, + "step": 1169 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012129842977617265, + "loss": 0.9983, + "step": 1170 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012118319311753137, + "loss": 1.0076, + "step": 1171 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012106792699957263, + "loss": 1.1181, + "step": 1172 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012095263158259631, + "loss": 0.8759, + "step": 1173 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012083730702694291, + "loss": 0.9855, + "step": 1174 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012072195349299345, + "loss": 1.1361, + "step": 1175 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012060657114116926, + "loss": 1.0275, + "step": 1176 + }, + { + "epoch": 1.73, + "eval_loss": 0.9928944110870361, + "eval_runtime": 2.6469, + "eval_samples_per_second": 412.56, + "eval_steps_per_second": 26.068, + "step": 1176 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001204911601319318, + "loss": 1.0256, + "step": 1177 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012037572062578238, + "loss": 0.9218, + "step": 1178 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012026025278326187, + "loss": 1.0394, + "step": 1179 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012014475676495052, + "loss": 1.0318, + "step": 1180 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012002923273146794, + "loss": 1.0361, + "step": 1181 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011991368084347252, + "loss": 1.0093, + "step": 1182 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011979810126166151, + "loss": 0.9527, + "step": 1183 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011968249414677055, + "loss": 1.0946, + "step": 1184 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011956685965957368, + "loss": 1.0124, + "step": 1185 + }, + { + "epoch": 1.75, + "learning_rate": 0.000119451197960883, + "loss": 1.0074, + "step": 1186 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011933550921154834, + "loss": 1.0315, + "step": 1187 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001192197935724573, + "loss": 0.9915, + "step": 1188 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011910405120453476, + "loss": 0.9823, + "step": 1189 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011898828226874284, + "loss": 1.0294, + "step": 1190 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011887248692608057, + "loss": 1.0176, + "step": 1191 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011875666533758372, + "loss": 1.0486, + "step": 1192 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011864081766432456, + "loss": 1.0237, + "step": 1193 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011852494406741165, + "loss": 1.0469, + "step": 1194 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011840904470798955, + "loss": 0.9545, + "step": 1195 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011829311974723867, + "loss": 0.9812, + "step": 1196 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011817716934637509, + "loss": 1.0503, + "step": 1197 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001180611936666502, + "loss": 1.0693, + "step": 1198 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011794519286935055, + "loss": 0.9627, + "step": 1199 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011782916711579759, + "loss": 0.9728, + "step": 1200 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001177131165673476, + "loss": 1.13, + "step": 1201 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001175970413853912, + "loss": 0.9756, + "step": 1202 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011748094173135337, + "loss": 1.0069, + "step": 1203 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011736481776669306, + "loss": 1.033, + "step": 1204 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011724866965290302, + "loss": 0.9906, + "step": 1205 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011713249755150965, + "loss": 1.1008, + "step": 1206 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011701630162407266, + "loss": 0.9987, + "step": 1207 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011690008203218493, + "loss": 1.0122, + "step": 1208 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001167838389374722, + "loss": 1.0495, + "step": 1209 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001166675725015929, + "loss": 0.9875, + "step": 1210 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011655128288623802, + "loss": 1.0231, + "step": 1211 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011643497025313061, + "loss": 0.9342, + "step": 1212 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011631863476402594, + "loss": 1.1006, + "step": 1213 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011620227658071087, + "loss": 0.9264, + "step": 1214 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011608589586500391, + "loss": 1.1099, + "step": 1215 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011596949277875495, + "loss": 1.0326, + "step": 1216 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001158530674838449, + "loss": 0.9235, + "step": 1217 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011573662014218564, + "loss": 1.0227, + "step": 1218 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011562015091571963, + "loss": 1.0028, + "step": 1219 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011550365996641979, + "loss": 1.0744, + "step": 1220 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011538714745628931, + "loss": 0.9521, + "step": 1221 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011527061354736129, + "loss": 1.0171, + "step": 1222 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011515405840169861, + "loss": 1.0481, + "step": 1223 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011503748218139369, + "loss": 1.0034, + "step": 1224 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011492088504856826, + "loss": 1.1384, + "step": 1225 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011480426716537315, + "loss": 1.0268, + "step": 1226 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011468762869398802, + "loss": 1.003, + "step": 1227 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011457096979662114, + "loss": 1.1087, + "step": 1228 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011445429063550926, + "loss": 1.0809, + "step": 1229 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011433759137291727, + "loss": 1.0054, + "step": 1230 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011422087217113795, + "loss": 0.9416, + "step": 1231 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011410413319249194, + "loss": 1.0153, + "step": 1232 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011398737459932728, + "loss": 1.0622, + "step": 1233 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011387059655401932, + "loss": 1.0792, + "step": 1234 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011375379921897051, + "loss": 0.9822, + "step": 1235 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011363698275661001, + "loss": 0.9949, + "step": 1236 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011352014732939369, + "loss": 0.9653, + "step": 1237 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011340329309980377, + "loss": 1.0694, + "step": 1238 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011328642023034857, + "loss": 0.9925, + "step": 1239 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011316952888356237, + "loss": 0.9829, + "step": 1240 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011305261922200519, + "loss": 0.9659, + "step": 1241 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011293569140826239, + "loss": 1.109, + "step": 1242 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011281874560494472, + "loss": 1.0614, + "step": 1243 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011270178197468789, + "loss": 0.9013, + "step": 1244 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011258480068015235, + "loss": 1.0049, + "step": 1245 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011246780188402322, + "loss": 0.9746, + "step": 1246 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011235078574900984, + "loss": 1.1433, + "step": 1247 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011223375243784573, + "loss": 1.0196, + "step": 1248 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011211670211328833, + "loss": 0.9859, + "step": 1249 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001119996349381187, + "loss": 0.9037, + "step": 1250 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001118825510751413, + "loss": 1.0481, + "step": 1251 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011176545068718385, + "loss": 1.0324, + "step": 1252 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011164833393709706, + "loss": 1.0155, + "step": 1253 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011153120098775434, + "loss": 0.967, + "step": 1254 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011141405200205166, + "loss": 0.9766, + "step": 1255 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011129688714290729, + "loss": 1.0075, + "step": 1256 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011117970657326158, + "loss": 0.9472, + "step": 1257 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011106251045607674, + "loss": 0.9949, + "step": 1258 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011094529895433652, + "loss": 1.0302, + "step": 1259 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001108280722310462, + "loss": 1.0538, + "step": 1260 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011071083044923214, + "loss": 1.0025, + "step": 1261 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011059357377194161, + "loss": 1.024, + "step": 1262 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011047630236224271, + "loss": 0.9452, + "step": 1263 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011035901638322392, + "loss": 1.0055, + "step": 1264 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011024171599799409, + "loss": 0.9875, + "step": 1265 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011012440136968196, + "loss": 0.9582, + "step": 1266 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011000707266143617, + "loss": 0.9986, + "step": 1267 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010988973003642499, + "loss": 1.0328, + "step": 1268 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001097723736578359, + "loss": 1.0108, + "step": 1269 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010965500368887567, + "loss": 0.9941, + "step": 1270 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010953762029276982, + "loss": 1.0842, + "step": 1271 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010942022363276264, + "loss": 0.994, + "step": 1272 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010930281387211683, + "loss": 1.0151, + "step": 1273 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010918539117411333, + "loss": 1.0172, + "step": 1274 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010906795570205104, + "loss": 1.0698, + "step": 1275 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010895050761924668, + "loss": 0.9835, + "step": 1276 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001088330470890344, + "loss": 0.9461, + "step": 1277 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010871557427476583, + "loss": 1.0394, + "step": 1278 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010859808933980948, + "loss": 0.9639, + "step": 1279 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010848059244755093, + "loss": 0.9863, + "step": 1280 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010836308376139221, + "loss": 1.0728, + "step": 1281 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010824556344475181, + "loss": 0.9989, + "step": 1282 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010812803166106444, + "loss": 0.962, + "step": 1283 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010801048857378071, + "loss": 0.8658, + "step": 1284 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010789293434636698, + "loss": 1.0488, + "step": 1285 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010777536914230508, + "loss": 1.0183, + "step": 1286 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010765779312509208, + "loss": 0.9535, + "step": 1287 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010754020645824017, + "loss": 0.9978, + "step": 1288 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010742260930527625, + "loss": 0.8929, + "step": 1289 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001073050018297419, + "loss": 0.9762, + "step": 1290 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010718738419519297, + "loss": 1.0559, + "step": 1291 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010706975656519946, + "loss": 1.0327, + "step": 1292 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010695211910334537, + "loss": 1.0322, + "step": 1293 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010683447197322817, + "loss": 1.0542, + "step": 1294 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010671681533845899, + "loss": 1.0521, + "step": 1295 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010659914936266206, + "loss": 0.9967, + "step": 1296 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010648147420947461, + "loss": 1.0491, + "step": 1297 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010636379004254664, + "loss": 0.9035, + "step": 1298 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010624609702554069, + "loss": 1.0704, + "step": 1299 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010612839532213164, + "loss": 0.9533, + "step": 1300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010601068509600642, + "loss": 1.0396, + "step": 1301 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010589296651086376, + "loss": 0.9543, + "step": 1302 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001057752397304141, + "loss": 1.0591, + "step": 1303 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010565750491837925, + "loss": 1.1191, + "step": 1304 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010553976223849218, + "loss": 0.916, + "step": 1305 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010542201185449678, + "loss": 0.9732, + "step": 1306 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010530425393014774, + "loss": 1.01, + "step": 1307 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010518648862921012, + "loss": 0.9849, + "step": 1308 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001050687161154593, + "loss": 1.0519, + "step": 1309 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010495093655268071, + "loss": 1.0539, + "step": 1310 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010483315010466952, + "loss": 0.9922, + "step": 1311 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010471535693523057, + "loss": 1.0048, + "step": 1312 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010459755720817797, + "loss": 1.0576, + "step": 1313 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010447975108733492, + "loss": 1.0268, + "step": 1314 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010436193873653361, + "loss": 1.0566, + "step": 1315 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010424412031961484, + "loss": 1.0294, + "step": 1316 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010412629600042785, + "loss": 1.0808, + "step": 1317 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010400846594283012, + "loss": 1.0487, + "step": 1318 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010389063031068698, + "loss": 1.04, + "step": 1319 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010377278926787173, + "loss": 1.033, + "step": 1320 + }, + { + "epoch": 1.95, + "learning_rate": 0.000103654942978265, + "loss": 0.9637, + "step": 1321 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010353709160575489, + "loss": 0.9665, + "step": 1322 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010341923531423634, + "loss": 1.0079, + "step": 1323 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010330137426761135, + "loss": 0.9989, + "step": 1324 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010318350862978848, + "loss": 1.0103, + "step": 1325 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010306563856468253, + "loss": 0.9872, + "step": 1326 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010294776423621464, + "loss": 0.9684, + "step": 1327 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010282988580831183, + "loss": 0.9745, + "step": 1328 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010271200344490674, + "loss": 1.0621, + "step": 1329 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001025941173099376, + "loss": 1.0639, + "step": 1330 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010247622756734774, + "loss": 0.914, + "step": 1331 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010235833438108571, + "loss": 1.0135, + "step": 1332 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010224043791510465, + "loss": 1.0132, + "step": 1333 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010212253833336237, + "loss": 0.9912, + "step": 1334 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010200463579982098, + "loss": 0.9869, + "step": 1335 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001018867304784467, + "loss": 0.9784, + "step": 1336 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010176882253320967, + "loss": 0.9837, + "step": 1337 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001016509121280836, + "loss": 1.039, + "step": 1338 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010153299942704566, + "loss": 0.9984, + "step": 1339 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010141508459407623, + "loss": 1.0526, + "step": 1340 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010129716779315862, + "loss": 1.0581, + "step": 1341 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001011792491882789, + "loss": 1.0607, + "step": 1342 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010106132894342564, + "loss": 0.9734, + "step": 1343 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010094340722258969, + "loss": 0.8872, + "step": 1344 + }, + { + "epoch": 1.98, + "eval_loss": 0.9883129000663757, + "eval_runtime": 2.6191, + "eval_samples_per_second": 416.937, + "eval_steps_per_second": 26.345, + "step": 1344 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010082548418976399, + "loss": 1.0538, + "step": 1345 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010070756000894321, + "loss": 0.9638, + "step": 1346 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010058963484412372, + "loss": 1.0556, + "step": 1347 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010047170885930324, + "loss": 1.0462, + "step": 1348 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010035378221848053, + "loss": 0.9999, + "step": 1349 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010023585508565538, + "loss": 0.9644, + "step": 1350 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010011792762482826, + "loss": 1.0601, + "step": 1351 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001, + "loss": 0.9745, + "step": 1352 + }, + { + "epoch": 2.0, + "learning_rate": 9.988207237517178e-05, + "loss": 1.0911, + "step": 1353 + }, + { + "epoch": 2.0, + "learning_rate": 9.976414491434463e-05, + "loss": 0.9607, + "step": 1354 + }, + { + "epoch": 2.0, + "learning_rate": 9.964621778151946e-05, + "loss": 1.0151, + "step": 1355 + }, + { + "epoch": 2.0, + "learning_rate": 9.95282911406968e-05, + "loss": 1.036, + "step": 1356 + }, + { + "epoch": 2.0, + "learning_rate": 9.94103651558763e-05, + "loss": 1.0518, + "step": 1357 + }, + { + "epoch": 2.0, + "learning_rate": 9.929243999105682e-05, + "loss": 0.9251, + "step": 1358 + }, + { + "epoch": 2.01, + "learning_rate": 9.917451581023607e-05, + "loss": 0.9899, + "step": 1359 + }, + { + "epoch": 2.01, + "learning_rate": 9.905659277741032e-05, + "loss": 0.9737, + "step": 1360 + }, + { + "epoch": 2.01, + "learning_rate": 9.89386710565744e-05, + "loss": 1.0575, + "step": 1361 + }, + { + "epoch": 2.01, + "learning_rate": 9.882075081172112e-05, + "loss": 1.0074, + "step": 1362 + }, + { + "epoch": 2.01, + "learning_rate": 9.870283220684142e-05, + "loss": 0.9959, + "step": 1363 + }, + { + "epoch": 2.01, + "learning_rate": 9.858491540592382e-05, + "loss": 1.039, + "step": 1364 + }, + { + "epoch": 2.01, + "learning_rate": 9.846700057295435e-05, + "loss": 1.0228, + "step": 1365 + }, + { + "epoch": 2.02, + "learning_rate": 9.834908787191642e-05, + "loss": 1.014, + "step": 1366 + }, + { + "epoch": 2.02, + "learning_rate": 9.823117746679034e-05, + "loss": 0.9919, + "step": 1367 + }, + { + "epoch": 2.02, + "learning_rate": 9.811326952155331e-05, + "loss": 1.0975, + "step": 1368 + }, + { + "epoch": 2.02, + "learning_rate": 9.799536420017906e-05, + "loss": 0.934, + "step": 1369 + }, + { + "epoch": 2.0, + "learning_rate": 9.787746166663764e-05, + "loss": 0.9068, + "step": 1370 + }, + { + "epoch": 2.0, + "learning_rate": 9.775956208489536e-05, + "loss": 0.8948, + "step": 1371 + }, + { + "epoch": 2.0, + "learning_rate": 9.764166561891432e-05, + "loss": 0.9547, + "step": 1372 + }, + { + "epoch": 2.01, + "learning_rate": 9.752377243265229e-05, + "loss": 0.933, + "step": 1373 + }, + { + "epoch": 2.01, + "learning_rate": 9.740588269006246e-05, + "loss": 0.9899, + "step": 1374 + }, + { + "epoch": 2.01, + "learning_rate": 9.728799655509327e-05, + "loss": 0.9059, + "step": 1375 + }, + { + "epoch": 2.01, + "learning_rate": 9.71701141916882e-05, + "loss": 0.9062, + "step": 1376 + }, + { + "epoch": 2.01, + "learning_rate": 9.705223576378539e-05, + "loss": 0.9101, + "step": 1377 + }, + { + "epoch": 2.01, + "learning_rate": 9.69343614353175e-05, + "loss": 0.9635, + "step": 1378 + }, + { + "epoch": 2.01, + "learning_rate": 9.681649137021158e-05, + "loss": 0.8867, + "step": 1379 + }, + { + "epoch": 2.02, + "learning_rate": 9.669862573238863e-05, + "loss": 0.968, + "step": 1380 + }, + { + "epoch": 2.02, + "learning_rate": 9.658076468576368e-05, + "loss": 0.9432, + "step": 1381 + }, + { + "epoch": 2.02, + "learning_rate": 9.646290839424515e-05, + "loss": 0.9186, + "step": 1382 + }, + { + "epoch": 2.02, + "learning_rate": 9.6345057021735e-05, + "loss": 1.038, + "step": 1383 + }, + { + "epoch": 2.02, + "learning_rate": 9.622721073212832e-05, + "loss": 0.9284, + "step": 1384 + }, + { + "epoch": 2.02, + "learning_rate": 9.610936968931303e-05, + "loss": 0.9076, + "step": 1385 + }, + { + "epoch": 2.03, + "learning_rate": 9.599153405716992e-05, + "loss": 0.9196, + "step": 1386 + }, + { + "epoch": 2.03, + "learning_rate": 9.587370399957217e-05, + "loss": 0.9589, + "step": 1387 + }, + { + "epoch": 2.03, + "learning_rate": 9.57558796803852e-05, + "loss": 0.9658, + "step": 1388 + }, + { + "epoch": 2.03, + "learning_rate": 9.563806126346642e-05, + "loss": 0.8864, + "step": 1389 + }, + { + "epoch": 2.03, + "learning_rate": 9.55202489126651e-05, + "loss": 1.0115, + "step": 1390 + }, + { + "epoch": 2.03, + "learning_rate": 9.540244279182205e-05, + "loss": 0.9058, + "step": 1391 + }, + { + "epoch": 2.03, + "learning_rate": 9.528464306476945e-05, + "loss": 0.8686, + "step": 1392 + }, + { + "epoch": 2.04, + "learning_rate": 9.516684989533051e-05, + "loss": 0.9784, + "step": 1393 + }, + { + "epoch": 2.04, + "learning_rate": 9.504906344731932e-05, + "loss": 1.0002, + "step": 1394 + }, + { + "epoch": 2.04, + "learning_rate": 9.49312838845407e-05, + "loss": 0.9873, + "step": 1395 + }, + { + "epoch": 2.04, + "learning_rate": 9.48135113707899e-05, + "loss": 0.9026, + "step": 1396 + }, + { + "epoch": 2.04, + "learning_rate": 9.46957460698523e-05, + "loss": 0.9592, + "step": 1397 + }, + { + "epoch": 2.04, + "learning_rate": 9.457798814550323e-05, + "loss": 0.8996, + "step": 1398 + }, + { + "epoch": 2.04, + "learning_rate": 9.446023776150787e-05, + "loss": 1.0221, + "step": 1399 + }, + { + "epoch": 2.05, + "learning_rate": 9.434249508162076e-05, + "loss": 0.9343, + "step": 1400 + }, + { + "epoch": 2.05, + "learning_rate": 9.422476026958593e-05, + "loss": 0.9033, + "step": 1401 + }, + { + "epoch": 2.05, + "learning_rate": 9.410703348913627e-05, + "loss": 0.9904, + "step": 1402 + }, + { + "epoch": 2.05, + "learning_rate": 9.398931490399363e-05, + "loss": 0.8898, + "step": 1403 + }, + { + "epoch": 2.05, + "learning_rate": 9.38716046778684e-05, + "loss": 0.926, + "step": 1404 + }, + { + "epoch": 2.05, + "learning_rate": 9.375390297445932e-05, + "loss": 0.9515, + "step": 1405 + }, + { + "epoch": 2.06, + "learning_rate": 9.363620995745337e-05, + "loss": 0.9963, + "step": 1406 + }, + { + "epoch": 2.06, + "learning_rate": 9.351852579052543e-05, + "loss": 0.8657, + "step": 1407 + }, + { + "epoch": 2.06, + "learning_rate": 9.340085063733797e-05, + "loss": 0.9137, + "step": 1408 + }, + { + "epoch": 2.06, + "learning_rate": 9.328318466154102e-05, + "loss": 0.9354, + "step": 1409 + }, + { + "epoch": 2.06, + "learning_rate": 9.316552802677184e-05, + "loss": 0.8961, + "step": 1410 + }, + { + "epoch": 2.06, + "learning_rate": 9.304788089665466e-05, + "loss": 0.9796, + "step": 1411 + }, + { + "epoch": 2.06, + "learning_rate": 9.293024343480055e-05, + "loss": 0.8937, + "step": 1412 + }, + { + "epoch": 2.07, + "learning_rate": 9.281261580480705e-05, + "loss": 0.9589, + "step": 1413 + }, + { + "epoch": 2.07, + "learning_rate": 9.269499817025814e-05, + "loss": 1.025, + "step": 1414 + }, + { + "epoch": 2.07, + "learning_rate": 9.257739069472374e-05, + "loss": 0.9527, + "step": 1415 + }, + { + "epoch": 2.07, + "learning_rate": 9.245979354175985e-05, + "loss": 0.9236, + "step": 1416 + }, + { + "epoch": 2.07, + "learning_rate": 9.234220687490794e-05, + "loss": 0.8775, + "step": 1417 + }, + { + "epoch": 2.07, + "learning_rate": 9.222463085769494e-05, + "loss": 0.9171, + "step": 1418 + }, + { + "epoch": 2.07, + "learning_rate": 9.210706565363305e-05, + "loss": 0.8984, + "step": 1419 + }, + { + "epoch": 2.08, + "learning_rate": 9.198951142621929e-05, + "loss": 0.8724, + "step": 1420 + }, + { + "epoch": 2.08, + "learning_rate": 9.187196833893558e-05, + "loss": 0.918, + "step": 1421 + }, + { + "epoch": 2.08, + "learning_rate": 9.175443655524821e-05, + "loss": 0.9457, + "step": 1422 + }, + { + "epoch": 2.08, + "learning_rate": 9.163691623860784e-05, + "loss": 0.9508, + "step": 1423 + }, + { + "epoch": 2.08, + "learning_rate": 9.151940755244912e-05, + "loss": 0.8994, + "step": 1424 + }, + { + "epoch": 2.08, + "learning_rate": 9.14019106601905e-05, + "loss": 0.9118, + "step": 1425 + }, + { + "epoch": 2.08, + "learning_rate": 9.128442572523417e-05, + "loss": 0.8908, + "step": 1426 + }, + { + "epoch": 2.09, + "learning_rate": 9.11669529109656e-05, + "loss": 0.9755, + "step": 1427 + }, + { + "epoch": 2.09, + "learning_rate": 9.104949238075336e-05, + "loss": 0.9706, + "step": 1428 + }, + { + "epoch": 2.09, + "learning_rate": 9.093204429794898e-05, + "loss": 0.9156, + "step": 1429 + }, + { + "epoch": 2.09, + "learning_rate": 9.081460882588667e-05, + "loss": 0.9797, + "step": 1430 + }, + { + "epoch": 2.09, + "learning_rate": 9.069718612788318e-05, + "loss": 0.9402, + "step": 1431 + }, + { + "epoch": 2.09, + "learning_rate": 9.057977636723739e-05, + "loss": 0.9339, + "step": 1432 + }, + { + "epoch": 2.1, + "learning_rate": 9.04623797072302e-05, + "loss": 0.9026, + "step": 1433 + }, + { + "epoch": 2.1, + "learning_rate": 9.034499631112437e-05, + "loss": 0.9424, + "step": 1434 + }, + { + "epoch": 2.1, + "learning_rate": 9.022762634216409e-05, + "loss": 0.948, + "step": 1435 + }, + { + "epoch": 2.1, + "learning_rate": 9.011026996357503e-05, + "loss": 0.9323, + "step": 1436 + }, + { + "epoch": 2.1, + "learning_rate": 8.999292733856385e-05, + "loss": 1.0037, + "step": 1437 + }, + { + "epoch": 2.1, + "learning_rate": 8.987559863031808e-05, + "loss": 0.9549, + "step": 1438 + }, + { + "epoch": 2.1, + "learning_rate": 8.975828400200592e-05, + "loss": 0.915, + "step": 1439 + }, + { + "epoch": 2.11, + "learning_rate": 8.964098361677607e-05, + "loss": 0.9074, + "step": 1440 + }, + { + "epoch": 2.11, + "learning_rate": 8.952369763775733e-05, + "loss": 0.843, + "step": 1441 + }, + { + "epoch": 2.11, + "learning_rate": 8.94064262280584e-05, + "loss": 0.8654, + "step": 1442 + }, + { + "epoch": 2.11, + "learning_rate": 8.928916955076792e-05, + "loss": 0.9349, + "step": 1443 + }, + { + "epoch": 2.11, + "learning_rate": 8.917192776895382e-05, + "loss": 0.9353, + "step": 1444 + }, + { + "epoch": 2.11, + "learning_rate": 8.90547010456635e-05, + "loss": 0.9137, + "step": 1445 + }, + { + "epoch": 2.11, + "learning_rate": 8.89374895439233e-05, + "loss": 0.9528, + "step": 1446 + }, + { + "epoch": 2.12, + "learning_rate": 8.882029342673844e-05, + "loss": 0.8906, + "step": 1447 + }, + { + "epoch": 2.12, + "learning_rate": 8.870311285709274e-05, + "loss": 0.9351, + "step": 1448 + }, + { + "epoch": 2.12, + "learning_rate": 8.858594799794835e-05, + "loss": 1.0043, + "step": 1449 + }, + { + "epoch": 2.12, + "learning_rate": 8.846879901224566e-05, + "loss": 0.8462, + "step": 1450 + }, + { + "epoch": 2.12, + "learning_rate": 8.835166606290295e-05, + "loss": 1.01, + "step": 1451 + }, + { + "epoch": 2.12, + "learning_rate": 8.823454931281616e-05, + "loss": 0.8858, + "step": 1452 + }, + { + "epoch": 2.13, + "learning_rate": 8.811744892485871e-05, + "loss": 0.9954, + "step": 1453 + }, + { + "epoch": 2.13, + "learning_rate": 8.800036506188129e-05, + "loss": 0.9609, + "step": 1454 + }, + { + "epoch": 2.13, + "learning_rate": 8.788329788671167e-05, + "loss": 0.8891, + "step": 1455 + }, + { + "epoch": 2.13, + "learning_rate": 8.776624756215429e-05, + "loss": 0.9669, + "step": 1456 + }, + { + "epoch": 2.13, + "learning_rate": 8.76492142509902e-05, + "loss": 0.981, + "step": 1457 + }, + { + "epoch": 2.13, + "learning_rate": 8.753219811597683e-05, + "loss": 0.9607, + "step": 1458 + }, + { + "epoch": 2.13, + "learning_rate": 8.741519931984766e-05, + "loss": 0.8752, + "step": 1459 + }, + { + "epoch": 2.14, + "learning_rate": 8.729821802531212e-05, + "loss": 0.8649, + "step": 1460 + }, + { + "epoch": 2.14, + "learning_rate": 8.71812543950553e-05, + "loss": 0.9491, + "step": 1461 + }, + { + "epoch": 2.14, + "learning_rate": 8.706430859173763e-05, + "loss": 0.9335, + "step": 1462 + }, + { + "epoch": 2.14, + "learning_rate": 8.694738077799488e-05, + "loss": 0.9752, + "step": 1463 + }, + { + "epoch": 2.14, + "learning_rate": 8.683047111643763e-05, + "loss": 0.987, + "step": 1464 + }, + { + "epoch": 2.14, + "learning_rate": 8.671357976965147e-05, + "loss": 0.9728, + "step": 1465 + }, + { + "epoch": 2.14, + "learning_rate": 8.659670690019625e-05, + "loss": 0.8742, + "step": 1466 + }, + { + "epoch": 2.15, + "learning_rate": 8.647985267060635e-05, + "loss": 0.8658, + "step": 1467 + }, + { + "epoch": 2.15, + "learning_rate": 8.636301724339004e-05, + "loss": 1.0111, + "step": 1468 + }, + { + "epoch": 2.15, + "learning_rate": 8.624620078102951e-05, + "loss": 0.865, + "step": 1469 + }, + { + "epoch": 2.15, + "learning_rate": 8.612940344598067e-05, + "loss": 0.9406, + "step": 1470 + }, + { + "epoch": 2.15, + "learning_rate": 8.601262540067274e-05, + "loss": 0.9647, + "step": 1471 + }, + { + "epoch": 2.15, + "learning_rate": 8.58958668075081e-05, + "loss": 0.9346, + "step": 1472 + }, + { + "epoch": 2.15, + "learning_rate": 8.577912782886206e-05, + "loss": 0.9357, + "step": 1473 + }, + { + "epoch": 2.16, + "learning_rate": 8.566240862708274e-05, + "loss": 0.9226, + "step": 1474 + }, + { + "epoch": 2.16, + "learning_rate": 8.554570936449074e-05, + "loss": 0.8791, + "step": 1475 + }, + { + "epoch": 2.16, + "learning_rate": 8.542903020337887e-05, + "loss": 0.9111, + "step": 1476 + }, + { + "epoch": 2.16, + "learning_rate": 8.531237130601199e-05, + "loss": 0.928, + "step": 1477 + }, + { + "epoch": 2.16, + "learning_rate": 8.519573283462687e-05, + "loss": 0.9075, + "step": 1478 + }, + { + "epoch": 2.16, + "learning_rate": 8.507911495143173e-05, + "loss": 0.9911, + "step": 1479 + }, + { + "epoch": 2.17, + "learning_rate": 8.496251781860633e-05, + "loss": 0.9687, + "step": 1480 + }, + { + "epoch": 2.17, + "learning_rate": 8.484594159830141e-05, + "loss": 0.8915, + "step": 1481 + }, + { + "epoch": 2.17, + "learning_rate": 8.472938645263875e-05, + "loss": 0.9551, + "step": 1482 + }, + { + "epoch": 2.17, + "learning_rate": 8.461285254371073e-05, + "loss": 0.902, + "step": 1483 + }, + { + "epoch": 2.17, + "learning_rate": 8.449634003358022e-05, + "loss": 0.963, + "step": 1484 + }, + { + "epoch": 2.17, + "learning_rate": 8.437984908428041e-05, + "loss": 0.8784, + "step": 1485 + }, + { + "epoch": 2.17, + "learning_rate": 8.426337985781438e-05, + "loss": 0.9793, + "step": 1486 + }, + { + "epoch": 2.18, + "learning_rate": 8.414693251615512e-05, + "loss": 0.9435, + "step": 1487 + }, + { + "epoch": 2.18, + "learning_rate": 8.403050722124509e-05, + "loss": 0.9944, + "step": 1488 + }, + { + "epoch": 2.18, + "learning_rate": 8.39141041349961e-05, + "loss": 0.9912, + "step": 1489 + }, + { + "epoch": 2.18, + "learning_rate": 8.379772341928915e-05, + "loss": 0.8934, + "step": 1490 + }, + { + "epoch": 2.18, + "learning_rate": 8.36813652359741e-05, + "loss": 0.9868, + "step": 1491 + }, + { + "epoch": 2.18, + "learning_rate": 8.356502974686941e-05, + "loss": 0.8958, + "step": 1492 + }, + { + "epoch": 2.18, + "learning_rate": 8.344871711376203e-05, + "loss": 0.9075, + "step": 1493 + }, + { + "epoch": 2.19, + "learning_rate": 8.33324274984071e-05, + "loss": 0.954, + "step": 1494 + }, + { + "epoch": 2.19, + "learning_rate": 8.321616106252783e-05, + "loss": 0.9316, + "step": 1495 + }, + { + "epoch": 2.19, + "learning_rate": 8.309991796781511e-05, + "loss": 0.9949, + "step": 1496 + }, + { + "epoch": 2.19, + "learning_rate": 8.298369837592735e-05, + "loss": 1.0344, + "step": 1497 + }, + { + "epoch": 2.19, + "learning_rate": 8.286750244849038e-05, + "loss": 0.9034, + "step": 1498 + }, + { + "epoch": 2.19, + "learning_rate": 8.275133034709699e-05, + "loss": 0.9102, + "step": 1499 + }, + { + "epoch": 2.2, + "learning_rate": 8.263518223330697e-05, + "loss": 0.9013, + "step": 1500 + }, + { + "epoch": 2.2, + "learning_rate": 8.251905826864665e-05, + "loss": 1.0105, + "step": 1501 + }, + { + "epoch": 2.2, + "learning_rate": 8.240295861460881e-05, + "loss": 0.9215, + "step": 1502 + }, + { + "epoch": 2.2, + "learning_rate": 8.228688343265242e-05, + "loss": 0.8733, + "step": 1503 + }, + { + "epoch": 2.2, + "learning_rate": 8.217083288420241e-05, + "loss": 0.9214, + "step": 1504 + }, + { + "epoch": 2.2, + "learning_rate": 8.205480713064946e-05, + "loss": 0.9535, + "step": 1505 + }, + { + "epoch": 2.2, + "learning_rate": 8.193880633334982e-05, + "loss": 0.9155, + "step": 1506 + }, + { + "epoch": 2.21, + "learning_rate": 8.182283065362493e-05, + "loss": 0.9045, + "step": 1507 + }, + { + "epoch": 2.21, + "learning_rate": 8.170688025276134e-05, + "loss": 1.0051, + "step": 1508 + }, + { + "epoch": 2.21, + "learning_rate": 8.159095529201049e-05, + "loss": 0.9856, + "step": 1509 + }, + { + "epoch": 2.21, + "learning_rate": 8.147505593258837e-05, + "loss": 0.9189, + "step": 1510 + }, + { + "epoch": 2.21, + "learning_rate": 8.135918233567545e-05, + "loss": 0.9455, + "step": 1511 + }, + { + "epoch": 2.21, + "learning_rate": 8.124333466241632e-05, + "loss": 0.9351, + "step": 1512 + }, + { + "epoch": 2.21, + "eval_loss": 0.9985308051109314, + "eval_runtime": 2.6183, + "eval_samples_per_second": 417.062, + "eval_steps_per_second": 26.353, + "step": 1512 + }, + { + "epoch": 2.21, + "learning_rate": 8.112751307391946e-05, + "loss": 0.9408, + "step": 1513 + }, + { + "epoch": 2.22, + "learning_rate": 8.101171773125716e-05, + "loss": 0.8725, + "step": 1514 + }, + { + "epoch": 2.22, + "learning_rate": 8.089594879546525e-05, + "loss": 0.9437, + "step": 1515 + }, + { + "epoch": 2.22, + "learning_rate": 8.078020642754274e-05, + "loss": 0.904, + "step": 1516 + }, + { + "epoch": 2.22, + "learning_rate": 8.066449078845168e-05, + "loss": 0.93, + "step": 1517 + }, + { + "epoch": 2.22, + "learning_rate": 8.054880203911705e-05, + "loss": 1.0044, + "step": 1518 + }, + { + "epoch": 2.22, + "learning_rate": 8.043314034042631e-05, + "loss": 0.957, + "step": 1519 + }, + { + "epoch": 2.23, + "learning_rate": 8.031750585322947e-05, + "loss": 0.9136, + "step": 1520 + }, + { + "epoch": 2.23, + "learning_rate": 8.020189873833852e-05, + "loss": 0.9619, + "step": 1521 + }, + { + "epoch": 2.23, + "learning_rate": 8.00863191565275e-05, + "loss": 0.961, + "step": 1522 + }, + { + "epoch": 2.23, + "learning_rate": 7.99707672685321e-05, + "loss": 0.8426, + "step": 1523 + }, + { + "epoch": 2.23, + "learning_rate": 7.985524323504948e-05, + "loss": 0.9201, + "step": 1524 + }, + { + "epoch": 2.23, + "learning_rate": 7.973974721673815e-05, + "loss": 0.8758, + "step": 1525 + }, + { + "epoch": 2.23, + "learning_rate": 7.962427937421763e-05, + "loss": 0.8926, + "step": 1526 + }, + { + "epoch": 2.24, + "learning_rate": 7.950883986806821e-05, + "loss": 0.9891, + "step": 1527 + }, + { + "epoch": 2.24, + "learning_rate": 7.939342885883076e-05, + "loss": 0.9544, + "step": 1528 + }, + { + "epoch": 2.24, + "learning_rate": 7.927804650700659e-05, + "loss": 0.9669, + "step": 1529 + }, + { + "epoch": 2.24, + "learning_rate": 7.916269297305712e-05, + "loss": 0.9712, + "step": 1530 + }, + { + "epoch": 2.24, + "learning_rate": 7.90473684174037e-05, + "loss": 0.952, + "step": 1531 + }, + { + "epoch": 2.24, + "learning_rate": 7.89320730004274e-05, + "loss": 0.9196, + "step": 1532 + }, + { + "epoch": 2.24, + "learning_rate": 7.881680688246869e-05, + "loss": 1.0468, + "step": 1533 + }, + { + "epoch": 2.25, + "learning_rate": 7.870157022382735e-05, + "loss": 0.9278, + "step": 1534 + }, + { + "epoch": 2.25, + "learning_rate": 7.858636318476226e-05, + "loss": 0.9395, + "step": 1535 + }, + { + "epoch": 2.25, + "learning_rate": 7.847118592549099e-05, + "loss": 0.9261, + "step": 1536 + }, + { + "epoch": 2.25, + "learning_rate": 7.835603860618972e-05, + "loss": 0.8694, + "step": 1537 + }, + { + "epoch": 2.25, + "learning_rate": 7.824092138699307e-05, + "loss": 1.0352, + "step": 1538 + }, + { + "epoch": 2.25, + "learning_rate": 7.812583442799368e-05, + "loss": 0.8773, + "step": 1539 + }, + { + "epoch": 2.25, + "learning_rate": 7.801077788924224e-05, + "loss": 0.8136, + "step": 1540 + }, + { + "epoch": 2.26, + "learning_rate": 7.789575193074704e-05, + "loss": 0.9875, + "step": 1541 + }, + { + "epoch": 2.26, + "learning_rate": 7.778075671247385e-05, + "loss": 0.9006, + "step": 1542 + }, + { + "epoch": 2.26, + "learning_rate": 7.766579239434575e-05, + "loss": 0.9503, + "step": 1543 + }, + { + "epoch": 2.26, + "learning_rate": 7.755085913624274e-05, + "loss": 0.9282, + "step": 1544 + }, + { + "epoch": 2.26, + "learning_rate": 7.743595709800176e-05, + "loss": 0.9057, + "step": 1545 + }, + { + "epoch": 2.26, + "learning_rate": 7.732108643941625e-05, + "loss": 0.8577, + "step": 1546 + }, + { + "epoch": 2.27, + "learning_rate": 7.720624732023603e-05, + "loss": 1.0027, + "step": 1547 + }, + { + "epoch": 2.27, + "learning_rate": 7.709143990016702e-05, + "loss": 0.8771, + "step": 1548 + }, + { + "epoch": 2.27, + "learning_rate": 7.697666433887108e-05, + "loss": 0.9449, + "step": 1549 + }, + { + "epoch": 2.27, + "learning_rate": 7.686192079596586e-05, + "loss": 0.9362, + "step": 1550 + }, + { + "epoch": 2.27, + "learning_rate": 7.674720943102432e-05, + "loss": 0.9937, + "step": 1551 + }, + { + "epoch": 2.27, + "learning_rate": 7.663253040357476e-05, + "loss": 0.9218, + "step": 1552 + }, + { + "epoch": 2.27, + "learning_rate": 7.651788387310052e-05, + "loss": 0.9557, + "step": 1553 + }, + { + "epoch": 2.28, + "learning_rate": 7.640326999903967e-05, + "loss": 0.9602, + "step": 1554 + }, + { + "epoch": 2.28, + "learning_rate": 7.628868894078501e-05, + "loss": 0.9414, + "step": 1555 + }, + { + "epoch": 2.28, + "learning_rate": 7.617414085768351e-05, + "loss": 0.8552, + "step": 1556 + }, + { + "epoch": 2.28, + "learning_rate": 7.605962590903643e-05, + "loss": 0.9802, + "step": 1557 + }, + { + "epoch": 2.28, + "learning_rate": 7.594514425409893e-05, + "loss": 0.9204, + "step": 1558 + }, + { + "epoch": 2.28, + "learning_rate": 7.583069605207975e-05, + "loss": 0.9143, + "step": 1559 + }, + { + "epoch": 2.28, + "learning_rate": 7.571628146214129e-05, + "loss": 0.9383, + "step": 1560 + }, + { + "epoch": 2.29, + "learning_rate": 7.560190064339908e-05, + "loss": 0.9052, + "step": 1561 + }, + { + "epoch": 2.29, + "learning_rate": 7.548755375492172e-05, + "loss": 0.9063, + "step": 1562 + }, + { + "epoch": 2.29, + "learning_rate": 7.537324095573064e-05, + "loss": 0.9549, + "step": 1563 + }, + { + "epoch": 2.29, + "learning_rate": 7.525896240479976e-05, + "loss": 0.9499, + "step": 1564 + }, + { + "epoch": 2.29, + "learning_rate": 7.514471826105556e-05, + "loss": 0.8888, + "step": 1565 + }, + { + "epoch": 2.29, + "learning_rate": 7.503050868337653e-05, + "loss": 0.9178, + "step": 1566 + }, + { + "epoch": 2.3, + "learning_rate": 7.491633383059313e-05, + "loss": 0.9875, + "step": 1567 + }, + { + "epoch": 2.3, + "learning_rate": 7.48021938614875e-05, + "loss": 0.8715, + "step": 1568 + }, + { + "epoch": 2.3, + "learning_rate": 7.468808893479327e-05, + "loss": 0.9246, + "step": 1569 + }, + { + "epoch": 2.3, + "learning_rate": 7.457401920919544e-05, + "loss": 0.8635, + "step": 1570 + }, + { + "epoch": 2.3, + "learning_rate": 7.445998484332993e-05, + "loss": 0.95, + "step": 1571 + }, + { + "epoch": 2.3, + "learning_rate": 7.434598599578351e-05, + "loss": 0.9175, + "step": 1572 + }, + { + "epoch": 2.3, + "learning_rate": 7.42320228250936e-05, + "loss": 0.9439, + "step": 1573 + }, + { + "epoch": 2.31, + "learning_rate": 7.411809548974792e-05, + "loss": 0.9417, + "step": 1574 + }, + { + "epoch": 2.31, + "learning_rate": 7.400420414818451e-05, + "loss": 0.9001, + "step": 1575 + }, + { + "epoch": 2.31, + "learning_rate": 7.389034895879118e-05, + "loss": 0.8844, + "step": 1576 + }, + { + "epoch": 2.31, + "learning_rate": 7.37765300799056e-05, + "loss": 0.9377, + "step": 1577 + }, + { + "epoch": 2.31, + "learning_rate": 7.366274766981483e-05, + "loss": 0.9851, + "step": 1578 + }, + { + "epoch": 2.31, + "learning_rate": 7.354900188675525e-05, + "loss": 0.965, + "step": 1579 + }, + { + "epoch": 2.31, + "learning_rate": 7.343529288891239e-05, + "loss": 0.9737, + "step": 1580 + }, + { + "epoch": 2.32, + "learning_rate": 7.332162083442049e-05, + "loss": 0.9686, + "step": 1581 + }, + { + "epoch": 2.32, + "learning_rate": 7.320798588136253e-05, + "loss": 0.968, + "step": 1582 + }, + { + "epoch": 2.32, + "learning_rate": 7.309438818776981e-05, + "loss": 0.8647, + "step": 1583 + }, + { + "epoch": 2.32, + "learning_rate": 7.29808279116218e-05, + "loss": 0.8572, + "step": 1584 + }, + { + "epoch": 2.32, + "learning_rate": 7.286730521084602e-05, + "loss": 0.9264, + "step": 1585 + }, + { + "epoch": 2.32, + "learning_rate": 7.275382024331772e-05, + "loss": 0.9601, + "step": 1586 + }, + { + "epoch": 2.32, + "learning_rate": 7.264037316685962e-05, + "loss": 0.9996, + "step": 1587 + }, + { + "epoch": 2.33, + "learning_rate": 7.252696413924174e-05, + "loss": 0.9598, + "step": 1588 + }, + { + "epoch": 2.33, + "learning_rate": 7.24135933181812e-05, + "loss": 0.8675, + "step": 1589 + }, + { + "epoch": 2.33, + "learning_rate": 7.23002608613421e-05, + "loss": 0.9298, + "step": 1590 + }, + { + "epoch": 2.33, + "learning_rate": 7.218696692633501e-05, + "loss": 1.0448, + "step": 1591 + }, + { + "epoch": 2.33, + "learning_rate": 7.2073711670717e-05, + "loss": 0.9525, + "step": 1592 + }, + { + "epoch": 2.33, + "learning_rate": 7.196049525199142e-05, + "loss": 0.9621, + "step": 1593 + }, + { + "epoch": 2.34, + "learning_rate": 7.184731782760746e-05, + "loss": 0.933, + "step": 1594 + }, + { + "epoch": 2.34, + "learning_rate": 7.173417955496024e-05, + "loss": 1.0498, + "step": 1595 + }, + { + "epoch": 2.34, + "learning_rate": 7.162108059139032e-05, + "loss": 0.9508, + "step": 1596 + }, + { + "epoch": 2.34, + "learning_rate": 7.150802109418366e-05, + "loss": 0.9854, + "step": 1597 + }, + { + "epoch": 2.34, + "learning_rate": 7.13950012205713e-05, + "loss": 0.971, + "step": 1598 + }, + { + "epoch": 2.34, + "learning_rate": 7.128202112772912e-05, + "loss": 0.9346, + "step": 1599 + }, + { + "epoch": 2.34, + "learning_rate": 7.116908097277781e-05, + "loss": 1.0434, + "step": 1600 + }, + { + "epoch": 2.35, + "learning_rate": 7.105618091278245e-05, + "loss": 0.9354, + "step": 1601 + }, + { + "epoch": 2.35, + "learning_rate": 7.094332110475234e-05, + "loss": 0.9009, + "step": 1602 + }, + { + "epoch": 2.35, + "learning_rate": 7.083050170564077e-05, + "loss": 0.9379, + "step": 1603 + }, + { + "epoch": 2.35, + "learning_rate": 7.071772287234497e-05, + "loss": 0.9488, + "step": 1604 + }, + { + "epoch": 2.35, + "learning_rate": 7.06049847617056e-05, + "loss": 0.8778, + "step": 1605 + }, + { + "epoch": 2.35, + "learning_rate": 7.049228753050681e-05, + "loss": 0.8452, + "step": 1606 + }, + { + "epoch": 2.35, + "learning_rate": 7.037963133547583e-05, + "loss": 0.8394, + "step": 1607 + }, + { + "epoch": 2.36, + "learning_rate": 7.026701633328276e-05, + "loss": 0.8895, + "step": 1608 + }, + { + "epoch": 2.36, + "learning_rate": 7.015444268054059e-05, + "loss": 0.9667, + "step": 1609 + }, + { + "epoch": 2.36, + "learning_rate": 7.004191053380469e-05, + "loss": 0.9573, + "step": 1610 + }, + { + "epoch": 2.36, + "learning_rate": 6.992942004957271e-05, + "loss": 1.0102, + "step": 1611 + }, + { + "epoch": 2.36, + "learning_rate": 6.981697138428434e-05, + "loss": 0.9507, + "step": 1612 + }, + { + "epoch": 2.36, + "learning_rate": 6.970456469432117e-05, + "loss": 0.9597, + "step": 1613 + }, + { + "epoch": 2.37, + "learning_rate": 6.959220013600641e-05, + "loss": 0.8432, + "step": 1614 + }, + { + "epoch": 2.37, + "learning_rate": 6.947987786560466e-05, + "loss": 0.9855, + "step": 1615 + }, + { + "epoch": 2.37, + "learning_rate": 6.936759803932167e-05, + "loss": 0.922, + "step": 1616 + }, + { + "epoch": 2.37, + "learning_rate": 6.925536081330424e-05, + "loss": 0.9261, + "step": 1617 + }, + { + "epoch": 2.37, + "learning_rate": 6.914316634363984e-05, + "loss": 0.9046, + "step": 1618 + }, + { + "epoch": 2.37, + "learning_rate": 6.903101478635662e-05, + "loss": 0.93, + "step": 1619 + }, + { + "epoch": 2.37, + "learning_rate": 6.891890629742288e-05, + "loss": 0.8668, + "step": 1620 + }, + { + "epoch": 2.38, + "learning_rate": 6.880684103274715e-05, + "loss": 0.9852, + "step": 1621 + }, + { + "epoch": 2.38, + "learning_rate": 6.869481914817779e-05, + "loss": 0.976, + "step": 1622 + }, + { + "epoch": 2.38, + "learning_rate": 6.85828407995028e-05, + "loss": 0.9287, + "step": 1623 + }, + { + "epoch": 2.38, + "learning_rate": 6.847090614244977e-05, + "loss": 0.9192, + "step": 1624 + }, + { + "epoch": 2.38, + "learning_rate": 6.835901533268536e-05, + "loss": 0.9999, + "step": 1625 + }, + { + "epoch": 2.38, + "learning_rate": 6.824716852581539e-05, + "loss": 0.8986, + "step": 1626 + }, + { + "epoch": 2.38, + "learning_rate": 6.813536587738436e-05, + "loss": 0.9818, + "step": 1627 + }, + { + "epoch": 2.39, + "learning_rate": 6.802360754287547e-05, + "loss": 0.9658, + "step": 1628 + }, + { + "epoch": 2.39, + "learning_rate": 6.791189367771025e-05, + "loss": 0.8793, + "step": 1629 + }, + { + "epoch": 2.39, + "learning_rate": 6.780022443724839e-05, + "loss": 0.9363, + "step": 1630 + }, + { + "epoch": 2.39, + "learning_rate": 6.768859997678751e-05, + "loss": 0.9108, + "step": 1631 + }, + { + "epoch": 2.39, + "learning_rate": 6.757702045156292e-05, + "loss": 0.9561, + "step": 1632 + }, + { + "epoch": 2.39, + "learning_rate": 6.74654860167475e-05, + "loss": 0.8761, + "step": 1633 + }, + { + "epoch": 2.39, + "learning_rate": 6.735399682745145e-05, + "loss": 0.9307, + "step": 1634 + }, + { + "epoch": 2.4, + "learning_rate": 6.724255303872197e-05, + "loss": 0.8416, + "step": 1635 + }, + { + "epoch": 2.4, + "learning_rate": 6.713115480554313e-05, + "loss": 0.908, + "step": 1636 + }, + { + "epoch": 2.4, + "learning_rate": 6.701980228283568e-05, + "loss": 0.9147, + "step": 1637 + }, + { + "epoch": 2.4, + "learning_rate": 6.690849562545678e-05, + "loss": 0.9877, + "step": 1638 + }, + { + "epoch": 2.4, + "learning_rate": 6.679723498819986e-05, + "loss": 0.9294, + "step": 1639 + }, + { + "epoch": 2.4, + "learning_rate": 6.668602052579424e-05, + "loss": 0.929, + "step": 1640 + }, + { + "epoch": 2.41, + "learning_rate": 6.657485239290515e-05, + "loss": 0.8996, + "step": 1641 + }, + { + "epoch": 2.41, + "learning_rate": 6.64637307441333e-05, + "loss": 0.8994, + "step": 1642 + }, + { + "epoch": 2.41, + "learning_rate": 6.635265573401474e-05, + "loss": 0.9253, + "step": 1643 + }, + { + "epoch": 2.41, + "learning_rate": 6.624162751702076e-05, + "loss": 0.8867, + "step": 1644 + }, + { + "epoch": 2.41, + "learning_rate": 6.613064624755753e-05, + "loss": 0.987, + "step": 1645 + }, + { + "epoch": 2.41, + "learning_rate": 6.601971207996591e-05, + "loss": 0.9079, + "step": 1646 + }, + { + "epoch": 2.41, + "learning_rate": 6.590882516852122e-05, + "loss": 0.9696, + "step": 1647 + }, + { + "epoch": 2.42, + "learning_rate": 6.579798566743314e-05, + "loss": 0.964, + "step": 1648 + }, + { + "epoch": 2.42, + "learning_rate": 6.568719373084538e-05, + "loss": 0.9736, + "step": 1649 + }, + { + "epoch": 2.42, + "learning_rate": 6.557644951283551e-05, + "loss": 0.9806, + "step": 1650 + }, + { + "epoch": 2.42, + "learning_rate": 6.546575316741474e-05, + "loss": 1.0062, + "step": 1651 + }, + { + "epoch": 2.42, + "learning_rate": 6.535510484852767e-05, + "loss": 0.8794, + "step": 1652 + }, + { + "epoch": 2.42, + "learning_rate": 6.524450471005213e-05, + "loss": 0.8359, + "step": 1653 + }, + { + "epoch": 2.42, + "learning_rate": 6.513395290579901e-05, + "loss": 0.9442, + "step": 1654 + }, + { + "epoch": 2.43, + "learning_rate": 6.50234495895119e-05, + "loss": 0.8903, + "step": 1655 + }, + { + "epoch": 2.43, + "learning_rate": 6.491299491486695e-05, + "loss": 0.9625, + "step": 1656 + }, + { + "epoch": 2.43, + "learning_rate": 6.480258903547276e-05, + "loss": 0.929, + "step": 1657 + }, + { + "epoch": 2.43, + "learning_rate": 6.469223210486992e-05, + "loss": 0.9751, + "step": 1658 + }, + { + "epoch": 2.43, + "learning_rate": 6.458192427653112e-05, + "loss": 0.982, + "step": 1659 + }, + { + "epoch": 2.43, + "learning_rate": 6.447166570386063e-05, + "loss": 0.8681, + "step": 1660 + }, + { + "epoch": 2.44, + "learning_rate": 6.436145654019432e-05, + "loss": 0.9149, + "step": 1661 + }, + { + "epoch": 2.44, + "learning_rate": 6.425129693879925e-05, + "loss": 0.9411, + "step": 1662 + }, + { + "epoch": 2.44, + "learning_rate": 6.414118705287359e-05, + "loss": 0.9072, + "step": 1663 + }, + { + "epoch": 2.44, + "learning_rate": 6.403112703554643e-05, + "loss": 0.9784, + "step": 1664 + }, + { + "epoch": 2.44, + "learning_rate": 6.392111703987744e-05, + "loss": 0.8754, + "step": 1665 + }, + { + "epoch": 2.44, + "learning_rate": 6.381115721885675e-05, + "loss": 1.0216, + "step": 1666 + }, + { + "epoch": 2.44, + "learning_rate": 6.370124772540469e-05, + "loss": 0.8493, + "step": 1667 + }, + { + "epoch": 2.45, + "learning_rate": 6.35913887123716e-05, + "loss": 0.9215, + "step": 1668 + }, + { + "epoch": 2.45, + "learning_rate": 6.348158033253773e-05, + "loss": 0.919, + "step": 1669 + }, + { + "epoch": 2.45, + "learning_rate": 6.337182273861273e-05, + "loss": 0.9128, + "step": 1670 + }, + { + "epoch": 2.45, + "learning_rate": 6.326211608323573e-05, + "loss": 0.9469, + "step": 1671 + }, + { + "epoch": 2.45, + "learning_rate": 6.315246051897503e-05, + "loss": 0.8582, + "step": 1672 + }, + { + "epoch": 2.45, + "learning_rate": 6.30428561983278e-05, + "loss": 0.8487, + "step": 1673 + }, + { + "epoch": 2.45, + "learning_rate": 6.293330327372005e-05, + "loss": 0.8687, + "step": 1674 + }, + { + "epoch": 2.46, + "learning_rate": 6.282380189750625e-05, + "loss": 0.9028, + "step": 1675 + }, + { + "epoch": 2.46, + "learning_rate": 6.271435222196916e-05, + "loss": 0.9105, + "step": 1676 + }, + { + "epoch": 2.46, + "learning_rate": 6.26049543993197e-05, + "loss": 0.9384, + "step": 1677 + }, + { + "epoch": 2.46, + "learning_rate": 6.249560858169661e-05, + "loss": 0.9658, + "step": 1678 + }, + { + "epoch": 2.46, + "learning_rate": 6.238631492116644e-05, + "loss": 0.9193, + "step": 1679 + }, + { + "epoch": 2.46, + "learning_rate": 6.227707356972301e-05, + "loss": 0.9077, + "step": 1680 + }, + { + "epoch": 2.46, + "eval_loss": 0.9968231916427612, + "eval_runtime": 2.6101, + "eval_samples_per_second": 418.377, + "eval_steps_per_second": 26.436, + "step": 1680 + }, + { + "epoch": 2.46, + "learning_rate": 6.216788467928758e-05, + "loss": 0.9083, + "step": 1681 + }, + { + "epoch": 2.47, + "learning_rate": 6.205874840170833e-05, + "loss": 0.9397, + "step": 1682 + }, + { + "epoch": 2.47, + "learning_rate": 6.194966488876027e-05, + "loss": 0.9631, + "step": 1683 + }, + { + "epoch": 2.47, + "learning_rate": 6.184063429214515e-05, + "loss": 0.9214, + "step": 1684 + }, + { + "epoch": 2.47, + "learning_rate": 6.173165676349103e-05, + "loss": 0.9544, + "step": 1685 + }, + { + "epoch": 2.47, + "learning_rate": 6.162273245435219e-05, + "loss": 0.9303, + "step": 1686 + }, + { + "epoch": 2.47, + "learning_rate": 6.151386151620887e-05, + "loss": 0.8605, + "step": 1687 + }, + { + "epoch": 2.48, + "learning_rate": 6.140504410046712e-05, + "loss": 0.9125, + "step": 1688 + }, + { + "epoch": 2.48, + "learning_rate": 6.129628035845861e-05, + "loss": 0.9202, + "step": 1689 + }, + { + "epoch": 2.48, + "learning_rate": 6.118757044144025e-05, + "loss": 0.937, + "step": 1690 + }, + { + "epoch": 2.48, + "learning_rate": 6.107891450059419e-05, + "loss": 0.8941, + "step": 1691 + }, + { + "epoch": 2.48, + "learning_rate": 6.097031268702746e-05, + "loss": 0.9205, + "step": 1692 + }, + { + "epoch": 2.48, + "learning_rate": 6.086176515177182e-05, + "loss": 0.9164, + "step": 1693 + }, + { + "epoch": 2.48, + "learning_rate": 6.0753272045783625e-05, + "loss": 0.9261, + "step": 1694 + }, + { + "epoch": 2.49, + "learning_rate": 6.0644833519943425e-05, + "loss": 0.8642, + "step": 1695 + }, + { + "epoch": 2.49, + "learning_rate": 6.053644972505593e-05, + "loss": 0.9648, + "step": 1696 + }, + { + "epoch": 2.49, + "learning_rate": 6.042812081184972e-05, + "loss": 0.9213, + "step": 1697 + }, + { + "epoch": 2.49, + "learning_rate": 6.0319846930977e-05, + "loss": 0.9314, + "step": 1698 + }, + { + "epoch": 2.49, + "learning_rate": 6.021162823301358e-05, + "loss": 0.9055, + "step": 1699 + }, + { + "epoch": 2.49, + "learning_rate": 6.010346486845837e-05, + "loss": 0.9335, + "step": 1700 + }, + { + "epoch": 2.49, + "learning_rate": 5.9995356987733466e-05, + "loss": 0.9365, + "step": 1701 + }, + { + "epoch": 2.5, + "learning_rate": 5.988730474118367e-05, + "loss": 0.9817, + "step": 1702 + }, + { + "epoch": 2.5, + "learning_rate": 5.977930827907649e-05, + "loss": 0.9145, + "step": 1703 + }, + { + "epoch": 2.5, + "learning_rate": 5.967136775160187e-05, + "loss": 0.9974, + "step": 1704 + }, + { + "epoch": 2.5, + "learning_rate": 5.956348330887196e-05, + "loss": 0.9577, + "step": 1705 + }, + { + "epoch": 2.5, + "learning_rate": 5.945565510092086e-05, + "loss": 0.8716, + "step": 1706 + }, + { + "epoch": 2.5, + "learning_rate": 5.93478832777045e-05, + "loss": 0.9541, + "step": 1707 + }, + { + "epoch": 2.51, + "learning_rate": 5.924016798910037e-05, + "loss": 0.9464, + "step": 1708 + }, + { + "epoch": 2.51, + "learning_rate": 5.913250938490744e-05, + "loss": 0.9774, + "step": 1709 + }, + { + "epoch": 2.51, + "learning_rate": 5.9024907614845716e-05, + "loss": 0.8654, + "step": 1710 + }, + { + "epoch": 2.51, + "learning_rate": 5.891736282855622e-05, + "loss": 0.8559, + "step": 1711 + }, + { + "epoch": 2.51, + "learning_rate": 5.880987517560075e-05, + "loss": 1.0032, + "step": 1712 + }, + { + "epoch": 2.51, + "learning_rate": 5.870244480546159e-05, + "loss": 0.9373, + "step": 1713 + }, + { + "epoch": 2.51, + "learning_rate": 5.859507186754146e-05, + "loss": 0.8892, + "step": 1714 + }, + { + "epoch": 2.52, + "learning_rate": 5.848775651116309e-05, + "loss": 0.8809, + "step": 1715 + }, + { + "epoch": 2.52, + "learning_rate": 5.838049888556925e-05, + "loss": 0.9596, + "step": 1716 + }, + { + "epoch": 2.52, + "learning_rate": 5.827329913992232e-05, + "loss": 0.95, + "step": 1717 + }, + { + "epoch": 2.52, + "learning_rate": 5.81661574233042e-05, + "loss": 0.9694, + "step": 1718 + }, + { + "epoch": 2.52, + "learning_rate": 5.80590738847162e-05, + "loss": 0.9333, + "step": 1719 + }, + { + "epoch": 2.52, + "learning_rate": 5.79520486730786e-05, + "loss": 0.943, + "step": 1720 + }, + { + "epoch": 2.52, + "learning_rate": 5.784508193723057e-05, + "loss": 0.906, + "step": 1721 + }, + { + "epoch": 2.53, + "learning_rate": 5.773817382593008e-05, + "loss": 0.9575, + "step": 1722 + }, + { + "epoch": 2.53, + "learning_rate": 5.763132448785339e-05, + "loss": 0.8638, + "step": 1723 + }, + { + "epoch": 2.53, + "learning_rate": 5.752453407159522e-05, + "loss": 0.9667, + "step": 1724 + }, + { + "epoch": 2.53, + "learning_rate": 5.741780272566821e-05, + "loss": 1.0119, + "step": 1725 + }, + { + "epoch": 2.53, + "learning_rate": 5.7311130598502885e-05, + "loss": 0.8563, + "step": 1726 + }, + { + "epoch": 2.53, + "learning_rate": 5.7204517838447405e-05, + "loss": 0.9852, + "step": 1727 + }, + { + "epoch": 2.54, + "learning_rate": 5.7097964593767375e-05, + "loss": 0.8659, + "step": 1728 + }, + { + "epoch": 2.54, + "learning_rate": 5.699147101264566e-05, + "loss": 0.9579, + "step": 1729 + }, + { + "epoch": 2.54, + "learning_rate": 5.688503724318217e-05, + "loss": 0.9226, + "step": 1730 + }, + { + "epoch": 2.54, + "learning_rate": 5.6778663433393574e-05, + "loss": 0.9771, + "step": 1731 + }, + { + "epoch": 2.54, + "learning_rate": 5.667234973121317e-05, + "loss": 0.8307, + "step": 1732 + }, + { + "epoch": 2.54, + "learning_rate": 5.6566096284490635e-05, + "loss": 0.9012, + "step": 1733 + }, + { + "epoch": 2.54, + "learning_rate": 5.645990324099197e-05, + "loss": 0.8566, + "step": 1734 + }, + { + "epoch": 2.55, + "learning_rate": 5.635377074839907e-05, + "loss": 0.941, + "step": 1735 + }, + { + "epoch": 2.55, + "learning_rate": 5.624769895430961e-05, + "loss": 0.8925, + "step": 1736 + }, + { + "epoch": 2.55, + "learning_rate": 5.614168800623687e-05, + "loss": 0.9387, + "step": 1737 + }, + { + "epoch": 2.55, + "learning_rate": 5.6035738051609555e-05, + "loss": 0.9025, + "step": 1738 + }, + { + "epoch": 2.55, + "learning_rate": 5.5929849237771556e-05, + "loss": 0.9394, + "step": 1739 + }, + { + "epoch": 2.55, + "learning_rate": 5.5824021711981686e-05, + "loss": 0.9743, + "step": 1740 + }, + { + "epoch": 2.55, + "learning_rate": 5.5718255621413526e-05, + "loss": 0.9604, + "step": 1741 + }, + { + "epoch": 2.56, + "learning_rate": 5.561255111315524e-05, + "loss": 0.9223, + "step": 1742 + }, + { + "epoch": 2.56, + "learning_rate": 5.550690833420928e-05, + "loss": 0.8748, + "step": 1743 + }, + { + "epoch": 2.56, + "learning_rate": 5.540132743149242e-05, + "loss": 1.0009, + "step": 1744 + }, + { + "epoch": 2.56, + "learning_rate": 5.5295808551835184e-05, + "loss": 0.9533, + "step": 1745 + }, + { + "epoch": 2.56, + "learning_rate": 5.5190351841982014e-05, + "loss": 0.9026, + "step": 1746 + }, + { + "epoch": 2.56, + "learning_rate": 5.508495744859077e-05, + "loss": 0.9514, + "step": 1747 + }, + { + "epoch": 2.56, + "learning_rate": 5.497962551823266e-05, + "loss": 0.9328, + "step": 1748 + }, + { + "epoch": 2.57, + "learning_rate": 5.487435619739214e-05, + "loss": 0.9765, + "step": 1749 + }, + { + "epoch": 2.57, + "learning_rate": 5.476914963246647e-05, + "loss": 0.8417, + "step": 1750 + }, + { + "epoch": 2.57, + "learning_rate": 5.4664005969765674e-05, + "loss": 0.93, + "step": 1751 + }, + { + "epoch": 2.57, + "learning_rate": 5.4558925355512256e-05, + "loss": 0.8757, + "step": 1752 + }, + { + "epoch": 2.57, + "learning_rate": 5.445390793584115e-05, + "loss": 0.9529, + "step": 1753 + }, + { + "epoch": 2.57, + "learning_rate": 5.434895385679937e-05, + "loss": 0.8961, + "step": 1754 + }, + { + "epoch": 2.58, + "learning_rate": 5.4244063264345745e-05, + "loss": 0.844, + "step": 1755 + }, + { + "epoch": 2.58, + "learning_rate": 5.4139236304350935e-05, + "loss": 0.9071, + "step": 1756 + }, + { + "epoch": 2.58, + "learning_rate": 5.403447312259702e-05, + "loss": 0.8767, + "step": 1757 + }, + { + "epoch": 2.58, + "learning_rate": 5.392977386477738e-05, + "loss": 0.9043, + "step": 1758 + }, + { + "epoch": 2.58, + "learning_rate": 5.382513867649663e-05, + "loss": 1.0033, + "step": 1759 + }, + { + "epoch": 2.58, + "learning_rate": 5.372056770327013e-05, + "loss": 0.9496, + "step": 1760 + }, + { + "epoch": 2.58, + "learning_rate": 5.361606109052397e-05, + "loss": 0.9717, + "step": 1761 + }, + { + "epoch": 2.59, + "learning_rate": 5.3511618983594845e-05, + "loss": 0.9332, + "step": 1762 + }, + { + "epoch": 2.59, + "learning_rate": 5.340724152772956e-05, + "loss": 0.8972, + "step": 1763 + }, + { + "epoch": 2.59, + "learning_rate": 5.33029288680852e-05, + "loss": 1.0025, + "step": 1764 + }, + { + "epoch": 2.59, + "learning_rate": 5.31986811497286e-05, + "loss": 0.9639, + "step": 1765 + }, + { + "epoch": 2.59, + "learning_rate": 5.309449851763633e-05, + "loss": 0.9181, + "step": 1766 + }, + { + "epoch": 2.59, + "learning_rate": 5.299038111669444e-05, + "loss": 0.9608, + "step": 1767 + }, + { + "epoch": 2.59, + "learning_rate": 5.288632909169823e-05, + "loss": 0.9961, + "step": 1768 + }, + { + "epoch": 2.6, + "learning_rate": 5.2782342587352154e-05, + "loss": 0.9709, + "step": 1769 + }, + { + "epoch": 2.6, + "learning_rate": 5.267842174826955e-05, + "loss": 1.0208, + "step": 1770 + }, + { + "epoch": 2.6, + "learning_rate": 5.2574566718972364e-05, + "loss": 0.8965, + "step": 1771 + }, + { + "epoch": 2.6, + "learning_rate": 5.247077764389099e-05, + "loss": 0.9577, + "step": 1772 + }, + { + "epoch": 2.6, + "learning_rate": 5.236705466736428e-05, + "loss": 0.9513, + "step": 1773 + }, + { + "epoch": 2.6, + "learning_rate": 5.226339793363898e-05, + "loss": 0.995, + "step": 1774 + }, + { + "epoch": 2.61, + "learning_rate": 5.215980758686978e-05, + "loss": 0.8956, + "step": 1775 + }, + { + "epoch": 2.61, + "learning_rate": 5.205628377111902e-05, + "loss": 0.9213, + "step": 1776 + }, + { + "epoch": 2.61, + "learning_rate": 5.195282663035661e-05, + "loss": 0.9413, + "step": 1777 + }, + { + "epoch": 2.61, + "learning_rate": 5.18494363084596e-05, + "loss": 0.9791, + "step": 1778 + }, + { + "epoch": 2.61, + "learning_rate": 5.174611294921224e-05, + "loss": 0.9651, + "step": 1779 + }, + { + "epoch": 2.61, + "learning_rate": 5.1642856696305575e-05, + "loss": 0.9441, + "step": 1780 + }, + { + "epoch": 2.61, + "learning_rate": 5.1539667693337335e-05, + "loss": 0.9206, + "step": 1781 + }, + { + "epoch": 2.62, + "learning_rate": 5.143654608381172e-05, + "loss": 0.8989, + "step": 1782 + }, + { + "epoch": 2.62, + "learning_rate": 5.133349201113929e-05, + "loss": 0.9114, + "step": 1783 + }, + { + "epoch": 2.62, + "learning_rate": 5.123050561863657e-05, + "loss": 0.8995, + "step": 1784 + }, + { + "epoch": 2.62, + "learning_rate": 5.112758704952598e-05, + "loss": 0.952, + "step": 1785 + }, + { + "epoch": 2.62, + "learning_rate": 5.1024736446935754e-05, + "loss": 0.8444, + "step": 1786 + }, + { + "epoch": 2.62, + "learning_rate": 5.092195395389937e-05, + "loss": 0.9613, + "step": 1787 + }, + { + "epoch": 2.62, + "learning_rate": 5.081923971335582e-05, + "loss": 0.9385, + "step": 1788 + }, + { + "epoch": 2.63, + "learning_rate": 5.071659386814907e-05, + "loss": 0.8304, + "step": 1789 + }, + { + "epoch": 2.63, + "learning_rate": 5.061401656102791e-05, + "loss": 0.9589, + "step": 1790 + }, + { + "epoch": 2.63, + "learning_rate": 5.051150793464592e-05, + "loss": 0.9628, + "step": 1791 + }, + { + "epoch": 2.63, + "learning_rate": 5.0409068131561067e-05, + "loss": 0.9054, + "step": 1792 + }, + { + "epoch": 2.63, + "learning_rate": 5.0306697294235714e-05, + "loss": 0.8631, + "step": 1793 + }, + { + "epoch": 2.63, + "learning_rate": 5.020439556503629e-05, + "loss": 0.8949, + "step": 1794 + }, + { + "epoch": 2.63, + "learning_rate": 5.0102163086233065e-05, + "loss": 1.0095, + "step": 1795 + }, + { + "epoch": 2.64, + "learning_rate": 5.000000000000002e-05, + "loss": 1.0535, + "step": 1796 + }, + { + "epoch": 2.64, + "learning_rate": 4.98979064484146e-05, + "loss": 0.9593, + "step": 1797 + }, + { + "epoch": 2.64, + "learning_rate": 4.979588257345766e-05, + "loss": 0.9763, + "step": 1798 + }, + { + "epoch": 2.64, + "learning_rate": 4.969392851701305e-05, + "loss": 0.9788, + "step": 1799 + }, + { + "epoch": 2.64, + "learning_rate": 4.959204442086753e-05, + "loss": 0.9516, + "step": 1800 + }, + { + "epoch": 2.64, + "learning_rate": 4.949023042671066e-05, + "loss": 0.9403, + "step": 1801 + }, + { + "epoch": 2.65, + "learning_rate": 4.938848667613436e-05, + "loss": 0.9843, + "step": 1802 + }, + { + "epoch": 2.65, + "learning_rate": 4.928681331063304e-05, + "loss": 1.0064, + "step": 1803 + }, + { + "epoch": 2.65, + "learning_rate": 4.918521047160308e-05, + "loss": 0.9603, + "step": 1804 + }, + { + "epoch": 2.65, + "learning_rate": 4.908367830034284e-05, + "loss": 0.8546, + "step": 1805 + }, + { + "epoch": 2.65, + "learning_rate": 4.8982216938052394e-05, + "loss": 0.9444, + "step": 1806 + }, + { + "epoch": 2.65, + "learning_rate": 4.888082652583331e-05, + "loss": 0.9918, + "step": 1807 + }, + { + "epoch": 2.65, + "learning_rate": 4.877950720468859e-05, + "loss": 0.8489, + "step": 1808 + }, + { + "epoch": 2.66, + "learning_rate": 4.8678259115522215e-05, + "loss": 1.0195, + "step": 1809 + }, + { + "epoch": 2.66, + "learning_rate": 4.8577082399139296e-05, + "loss": 0.9962, + "step": 1810 + }, + { + "epoch": 2.66, + "learning_rate": 4.8475977196245504e-05, + "loss": 0.9836, + "step": 1811 + }, + { + "epoch": 2.66, + "learning_rate": 4.837494364744711e-05, + "loss": 0.8597, + "step": 1812 + }, + { + "epoch": 2.66, + "learning_rate": 4.827398189325085e-05, + "loss": 1.002, + "step": 1813 + }, + { + "epoch": 2.66, + "learning_rate": 4.817309207406346e-05, + "loss": 0.9228, + "step": 1814 + }, + { + "epoch": 2.66, + "learning_rate": 4.8072274330191725e-05, + "loss": 0.9954, + "step": 1815 + }, + { + "epoch": 2.67, + "learning_rate": 4.7971528801842116e-05, + "loss": 0.9124, + "step": 1816 + }, + { + "epoch": 2.67, + "learning_rate": 4.787085562912076e-05, + "loss": 0.925, + "step": 1817 + }, + { + "epoch": 2.67, + "learning_rate": 4.777025495203319e-05, + "loss": 0.9245, + "step": 1818 + }, + { + "epoch": 2.67, + "learning_rate": 4.7669726910484e-05, + "loss": 0.8668, + "step": 1819 + }, + { + "epoch": 2.67, + "learning_rate": 4.756927164427685e-05, + "loss": 1.0186, + "step": 1820 + }, + { + "epoch": 2.67, + "learning_rate": 4.746888929311415e-05, + "loss": 0.95, + "step": 1821 + }, + { + "epoch": 2.68, + "learning_rate": 4.7368579996596904e-05, + "loss": 0.9373, + "step": 1822 + }, + { + "epoch": 2.68, + "learning_rate": 4.726834389422461e-05, + "loss": 0.9235, + "step": 1823 + }, + { + "epoch": 2.68, + "learning_rate": 4.716818112539485e-05, + "loss": 0.8682, + "step": 1824 + }, + { + "epoch": 2.68, + "learning_rate": 4.706809182940334e-05, + "loss": 0.9166, + "step": 1825 + }, + { + "epoch": 2.68, + "learning_rate": 4.6968076145443515e-05, + "loss": 0.9526, + "step": 1826 + }, + { + "epoch": 2.68, + "learning_rate": 4.686813421260646e-05, + "loss": 0.8961, + "step": 1827 + }, + { + "epoch": 2.68, + "learning_rate": 4.6768266169880804e-05, + "loss": 0.9258, + "step": 1828 + }, + { + "epoch": 2.69, + "learning_rate": 4.666847215615226e-05, + "loss": 0.9526, + "step": 1829 + }, + { + "epoch": 2.69, + "learning_rate": 4.656875231020368e-05, + "loss": 0.9606, + "step": 1830 + }, + { + "epoch": 2.69, + "learning_rate": 4.6469106770714745e-05, + "loss": 0.9352, + "step": 1831 + }, + { + "epoch": 2.69, + "learning_rate": 4.636953567626177e-05, + "loss": 0.9786, + "step": 1832 + }, + { + "epoch": 2.69, + "learning_rate": 4.6270039165317605e-05, + "loss": 0.8797, + "step": 1833 + }, + { + "epoch": 2.69, + "learning_rate": 4.617061737625139e-05, + "loss": 0.9609, + "step": 1834 + }, + { + "epoch": 2.69, + "learning_rate": 4.6071270447328276e-05, + "loss": 0.8972, + "step": 1835 + }, + { + "epoch": 2.7, + "learning_rate": 4.597199851670932e-05, + "loss": 0.9233, + "step": 1836 + }, + { + "epoch": 2.7, + "learning_rate": 4.587280172245129e-05, + "loss": 0.9455, + "step": 1837 + }, + { + "epoch": 2.7, + "learning_rate": 4.57736802025065e-05, + "loss": 0.8729, + "step": 1838 + }, + { + "epoch": 2.7, + "learning_rate": 4.567463409472255e-05, + "loss": 0.8894, + "step": 1839 + }, + { + "epoch": 2.7, + "learning_rate": 4.557566353684209e-05, + "loss": 0.9784, + "step": 1840 + }, + { + "epoch": 2.7, + "learning_rate": 4.547676866650289e-05, + "loss": 0.9586, + "step": 1841 + }, + { + "epoch": 2.7, + "learning_rate": 4.537794962123726e-05, + "loss": 0.978, + "step": 1842 + }, + { + "epoch": 2.71, + "learning_rate": 4.527920653847221e-05, + "loss": 0.9431, + "step": 1843 + }, + { + "epoch": 2.71, + "learning_rate": 4.518053955552903e-05, + "loss": 0.8657, + "step": 1844 + }, + { + "epoch": 2.71, + "learning_rate": 4.50819488096232e-05, + "loss": 0.7781, + "step": 1845 + }, + { + "epoch": 2.71, + "learning_rate": 4.498343443786416e-05, + "loss": 0.9086, + "step": 1846 + }, + { + "epoch": 2.71, + "learning_rate": 4.488499657725511e-05, + "loss": 0.897, + "step": 1847 + }, + { + "epoch": 2.71, + "learning_rate": 4.478663536469296e-05, + "loss": 0.9494, + "step": 1848 + }, + { + "epoch": 2.71, + "eval_loss": 0.9907075762748718, + "eval_runtime": 2.6095, + "eval_samples_per_second": 418.473, + "eval_steps_per_second": 26.442, + "step": 1848 + }, + { + "epoch": 2.72, + "learning_rate": 4.468835093696796e-05, + "loss": 0.9201, + "step": 1849 + }, + { + "epoch": 2.72, + "learning_rate": 4.4590143430763555e-05, + "loss": 0.989, + "step": 1850 + }, + { + "epoch": 2.72, + "learning_rate": 4.449201298265622e-05, + "loss": 0.8615, + "step": 1851 + }, + { + "epoch": 2.72, + "learning_rate": 4.4393959729115244e-05, + "loss": 0.9388, + "step": 1852 + }, + { + "epoch": 2.72, + "learning_rate": 4.429598380650266e-05, + "loss": 0.9179, + "step": 1853 + }, + { + "epoch": 2.72, + "learning_rate": 4.419808535107287e-05, + "loss": 0.9471, + "step": 1854 + }, + { + "epoch": 2.72, + "learning_rate": 4.4100264498972564e-05, + "loss": 0.9584, + "step": 1855 + }, + { + "epoch": 2.73, + "learning_rate": 4.4002521386240466e-05, + "loss": 0.9077, + "step": 1856 + }, + { + "epoch": 2.73, + "learning_rate": 4.3904856148807284e-05, + "loss": 0.876, + "step": 1857 + }, + { + "epoch": 2.73, + "learning_rate": 4.3807268922495406e-05, + "loss": 0.9785, + "step": 1858 + }, + { + "epoch": 2.73, + "learning_rate": 4.370975984301866e-05, + "loss": 0.8762, + "step": 1859 + }, + { + "epoch": 2.73, + "learning_rate": 4.3612329045982236e-05, + "loss": 0.8797, + "step": 1860 + }, + { + "epoch": 2.73, + "learning_rate": 4.351497666688246e-05, + "loss": 1.05, + "step": 1861 + }, + { + "epoch": 2.73, + "learning_rate": 4.341770284110656e-05, + "loss": 0.9844, + "step": 1862 + }, + { + "epoch": 2.74, + "learning_rate": 4.332050770393263e-05, + "loss": 0.9814, + "step": 1863 + }, + { + "epoch": 2.74, + "learning_rate": 4.322339139052921e-05, + "loss": 0.7994, + "step": 1864 + }, + { + "epoch": 2.74, + "learning_rate": 4.312635403595532e-05, + "loss": 0.8213, + "step": 1865 + }, + { + "epoch": 2.74, + "learning_rate": 4.3029395775160106e-05, + "loss": 0.9036, + "step": 1866 + }, + { + "epoch": 2.74, + "learning_rate": 4.293251674298269e-05, + "loss": 0.9382, + "step": 1867 + }, + { + "epoch": 2.74, + "learning_rate": 4.283571707415214e-05, + "loss": 0.9933, + "step": 1868 + }, + { + "epoch": 2.75, + "learning_rate": 4.273899690328702e-05, + "loss": 0.897, + "step": 1869 + }, + { + "epoch": 2.75, + "learning_rate": 4.264235636489542e-05, + "loss": 0.9293, + "step": 1870 + }, + { + "epoch": 2.75, + "learning_rate": 4.2545795593374594e-05, + "loss": 0.9417, + "step": 1871 + }, + { + "epoch": 2.75, + "learning_rate": 4.244931472301098e-05, + "loss": 0.8857, + "step": 1872 + }, + { + "epoch": 2.75, + "learning_rate": 4.235291388797986e-05, + "loss": 0.8865, + "step": 1873 + }, + { + "epoch": 2.75, + "learning_rate": 4.2256593222345185e-05, + "loss": 0.8899, + "step": 1874 + }, + { + "epoch": 2.75, + "learning_rate": 4.216035286005942e-05, + "loss": 0.9032, + "step": 1875 + }, + { + "epoch": 2.76, + "learning_rate": 4.206419293496333e-05, + "loss": 0.9283, + "step": 1876 + }, + { + "epoch": 2.76, + "learning_rate": 4.196811358078585e-05, + "loss": 0.852, + "step": 1877 + }, + { + "epoch": 2.76, + "learning_rate": 4.18721149311439e-05, + "loss": 0.9111, + "step": 1878 + }, + { + "epoch": 2.76, + "learning_rate": 4.177619711954211e-05, + "loss": 0.9561, + "step": 1879 + }, + { + "epoch": 2.76, + "learning_rate": 4.168036027937267e-05, + "loss": 0.8661, + "step": 1880 + }, + { + "epoch": 2.76, + "learning_rate": 4.1584604543915254e-05, + "loss": 0.9675, + "step": 1881 + }, + { + "epoch": 2.76, + "learning_rate": 4.148893004633663e-05, + "loss": 0.9231, + "step": 1882 + }, + { + "epoch": 2.77, + "learning_rate": 4.139333691969071e-05, + "loss": 0.8708, + "step": 1883 + }, + { + "epoch": 2.77, + "learning_rate": 4.129782529691815e-05, + "loss": 0.9466, + "step": 1884 + }, + { + "epoch": 2.77, + "learning_rate": 4.1202395310846296e-05, + "loss": 0.8492, + "step": 1885 + }, + { + "epoch": 2.77, + "learning_rate": 4.1107047094188946e-05, + "loss": 0.8113, + "step": 1886 + }, + { + "epoch": 2.77, + "learning_rate": 4.101178077954617e-05, + "loss": 0.8231, + "step": 1887 + }, + { + "epoch": 2.77, + "learning_rate": 4.091659649940419e-05, + "loss": 1.025, + "step": 1888 + }, + { + "epoch": 2.77, + "learning_rate": 4.082149438613514e-05, + "loss": 0.9734, + "step": 1889 + }, + { + "epoch": 2.78, + "learning_rate": 4.072647457199684e-05, + "loss": 0.9829, + "step": 1890 + }, + { + "epoch": 2.78, + "learning_rate": 4.063153718913267e-05, + "loss": 0.9899, + "step": 1891 + }, + { + "epoch": 2.78, + "learning_rate": 4.053668236957134e-05, + "loss": 0.9136, + "step": 1892 + }, + { + "epoch": 2.78, + "learning_rate": 4.044191024522686e-05, + "loss": 0.955, + "step": 1893 + }, + { + "epoch": 2.78, + "learning_rate": 4.034722094789809e-05, + "loss": 0.9592, + "step": 1894 + }, + { + "epoch": 2.78, + "learning_rate": 4.0252614609268766e-05, + "loss": 0.9513, + "step": 1895 + }, + { + "epoch": 2.79, + "learning_rate": 4.015809136090732e-05, + "loss": 0.8921, + "step": 1896 + }, + { + "epoch": 2.79, + "learning_rate": 4.0063651334266496e-05, + "loss": 0.9401, + "step": 1897 + }, + { + "epoch": 2.79, + "learning_rate": 3.996929466068344e-05, + "loss": 0.9658, + "step": 1898 + }, + { + "epoch": 2.79, + "learning_rate": 3.987502147137928e-05, + "loss": 1.0133, + "step": 1899 + }, + { + "epoch": 2.79, + "learning_rate": 3.978083189745907e-05, + "loss": 0.8799, + "step": 1900 + }, + { + "epoch": 2.79, + "learning_rate": 3.96867260699116e-05, + "loss": 0.9336, + "step": 1901 + }, + { + "epoch": 2.79, + "learning_rate": 3.9592704119609125e-05, + "loss": 0.9089, + "step": 1902 + }, + { + "epoch": 2.8, + "learning_rate": 3.9498766177307403e-05, + "loss": 0.9544, + "step": 1903 + }, + { + "epoch": 2.8, + "learning_rate": 3.9404912373645185e-05, + "loss": 0.877, + "step": 1904 + }, + { + "epoch": 2.8, + "learning_rate": 3.9311142839144365e-05, + "loss": 0.9766, + "step": 1905 + }, + { + "epoch": 2.8, + "learning_rate": 3.9217457704209536e-05, + "loss": 0.9153, + "step": 1906 + }, + { + "epoch": 2.8, + "learning_rate": 3.9123857099127936e-05, + "loss": 0.8915, + "step": 1907 + }, + { + "epoch": 2.8, + "learning_rate": 3.903034115406931e-05, + "loss": 0.8892, + "step": 1908 + }, + { + "epoch": 2.8, + "learning_rate": 3.893690999908562e-05, + "loss": 0.9365, + "step": 1909 + }, + { + "epoch": 2.81, + "learning_rate": 3.884356376411089e-05, + "loss": 0.9644, + "step": 1910 + }, + { + "epoch": 2.81, + "learning_rate": 3.875030257896105e-05, + "loss": 0.8724, + "step": 1911 + }, + { + "epoch": 2.81, + "learning_rate": 3.8657126573333804e-05, + "loss": 0.9454, + "step": 1912 + }, + { + "epoch": 2.81, + "learning_rate": 3.85640358768084e-05, + "loss": 0.9356, + "step": 1913 + }, + { + "epoch": 2.81, + "learning_rate": 3.8471030618845375e-05, + "loss": 0.9073, + "step": 1914 + }, + { + "epoch": 2.81, + "learning_rate": 3.837811092878649e-05, + "loss": 0.9627, + "step": 1915 + }, + { + "epoch": 2.82, + "learning_rate": 3.828527693585451e-05, + "loss": 0.9601, + "step": 1916 + }, + { + "epoch": 2.82, + "learning_rate": 3.819252876915297e-05, + "loss": 0.9001, + "step": 1917 + }, + { + "epoch": 2.82, + "learning_rate": 3.809986655766616e-05, + "loss": 0.9256, + "step": 1918 + }, + { + "epoch": 2.82, + "learning_rate": 3.800729043025871e-05, + "loss": 0.9052, + "step": 1919 + }, + { + "epoch": 2.82, + "learning_rate": 3.791480051567564e-05, + "loss": 1.0201, + "step": 1920 + }, + { + "epoch": 2.82, + "learning_rate": 3.7822396942542005e-05, + "loss": 0.9417, + "step": 1921 + }, + { + "epoch": 2.82, + "learning_rate": 3.7730079839362755e-05, + "loss": 0.8914, + "step": 1922 + }, + { + "epoch": 2.83, + "learning_rate": 3.76378493345227e-05, + "loss": 0.9132, + "step": 1923 + }, + { + "epoch": 2.83, + "learning_rate": 3.7545705556286126e-05, + "loss": 0.8873, + "step": 1924 + }, + { + "epoch": 2.83, + "learning_rate": 3.745364863279675e-05, + "loss": 0.9416, + "step": 1925 + }, + { + "epoch": 2.83, + "learning_rate": 3.7361678692077416e-05, + "loss": 0.9625, + "step": 1926 + }, + { + "epoch": 2.83, + "learning_rate": 3.726979586203019e-05, + "loss": 0.9371, + "step": 1927 + }, + { + "epoch": 2.83, + "learning_rate": 3.717800027043576e-05, + "loss": 0.9022, + "step": 1928 + }, + { + "epoch": 2.83, + "learning_rate": 3.708629204495371e-05, + "loss": 0.9038, + "step": 1929 + }, + { + "epoch": 2.84, + "learning_rate": 3.699467131312197e-05, + "loss": 0.9053, + "step": 1930 + }, + { + "epoch": 2.84, + "learning_rate": 3.6903138202356855e-05, + "loss": 1.0199, + "step": 1931 + }, + { + "epoch": 2.84, + "learning_rate": 3.681169283995279e-05, + "loss": 0.9524, + "step": 1932 + }, + { + "epoch": 2.84, + "learning_rate": 3.6720335353082246e-05, + "loss": 0.8613, + "step": 1933 + }, + { + "epoch": 2.84, + "learning_rate": 3.662906586879542e-05, + "loss": 0.9004, + "step": 1934 + }, + { + "epoch": 2.84, + "learning_rate": 3.653788451402009e-05, + "loss": 0.9132, + "step": 1935 + }, + { + "epoch": 2.85, + "learning_rate": 3.6446791415561574e-05, + "loss": 0.866, + "step": 1936 + }, + { + "epoch": 2.85, + "learning_rate": 3.635578670010242e-05, + "loss": 0.975, + "step": 1937 + }, + { + "epoch": 2.85, + "learning_rate": 3.626487049420223e-05, + "loss": 0.9129, + "step": 1938 + }, + { + "epoch": 2.85, + "learning_rate": 3.61740429242975e-05, + "loss": 0.9127, + "step": 1939 + }, + { + "epoch": 2.85, + "learning_rate": 3.6083304116701535e-05, + "loss": 0.9235, + "step": 1940 + }, + { + "epoch": 2.85, + "learning_rate": 3.599265419760408e-05, + "loss": 0.9644, + "step": 1941 + }, + { + "epoch": 2.85, + "learning_rate": 3.5902093293071425e-05, + "loss": 0.9893, + "step": 1942 + }, + { + "epoch": 2.86, + "learning_rate": 3.581162152904592e-05, + "loss": 0.967, + "step": 1943 + }, + { + "epoch": 2.86, + "learning_rate": 3.5721239031346066e-05, + "loss": 0.8631, + "step": 1944 + }, + { + "epoch": 2.86, + "learning_rate": 3.5630945925666134e-05, + "loss": 0.9066, + "step": 1945 + }, + { + "epoch": 2.86, + "learning_rate": 3.554074233757608e-05, + "loss": 1.0079, + "step": 1946 + }, + { + "epoch": 2.86, + "learning_rate": 3.545062839252147e-05, + "loss": 0.8998, + "step": 1947 + }, + { + "epoch": 2.86, + "learning_rate": 3.536060421582309e-05, + "loss": 0.9351, + "step": 1948 + }, + { + "epoch": 2.86, + "learning_rate": 3.5270669932676926e-05, + "loss": 1.0081, + "step": 1949 + }, + { + "epoch": 2.87, + "learning_rate": 3.518082566815396e-05, + "loss": 0.9089, + "step": 1950 + }, + { + "epoch": 2.87, + "learning_rate": 3.509107154719994e-05, + "loss": 0.9823, + "step": 1951 + }, + { + "epoch": 2.87, + "learning_rate": 3.500140769463533e-05, + "loss": 0.9625, + "step": 1952 + }, + { + "epoch": 2.87, + "learning_rate": 3.491183423515503e-05, + "loss": 0.8762, + "step": 1953 + }, + { + "epoch": 2.87, + "learning_rate": 3.48223512933282e-05, + "loss": 0.9355, + "step": 1954 + }, + { + "epoch": 2.87, + "learning_rate": 3.4732958993598154e-05, + "loss": 0.9384, + "step": 1955 + }, + { + "epoch": 2.87, + "learning_rate": 3.464365746028208e-05, + "loss": 0.9222, + "step": 1956 + }, + { + "epoch": 2.88, + "learning_rate": 3.455444681757105e-05, + "loss": 0.9472, + "step": 1957 + }, + { + "epoch": 2.88, + "learning_rate": 3.4465327189529664e-05, + "loss": 0.9375, + "step": 1958 + }, + { + "epoch": 2.88, + "learning_rate": 3.437629870009591e-05, + "loss": 0.8795, + "step": 1959 + }, + { + "epoch": 2.88, + "learning_rate": 3.428736147308115e-05, + "loss": 0.8276, + "step": 1960 + }, + { + "epoch": 2.88, + "learning_rate": 3.41985156321697e-05, + "loss": 1.0247, + "step": 1961 + }, + { + "epoch": 2.88, + "learning_rate": 3.410976130091892e-05, + "loss": 0.9808, + "step": 1962 + }, + { + "epoch": 2.89, + "learning_rate": 3.402109860275877e-05, + "loss": 0.9254, + "step": 1963 + }, + { + "epoch": 2.89, + "learning_rate": 3.393252766099187e-05, + "loss": 0.8803, + "step": 1964 + }, + { + "epoch": 2.89, + "learning_rate": 3.38440485987932e-05, + "loss": 0.9022, + "step": 1965 + }, + { + "epoch": 2.89, + "learning_rate": 3.375566153920992e-05, + "loss": 0.8599, + "step": 1966 + }, + { + "epoch": 2.89, + "learning_rate": 3.366736660516132e-05, + "loss": 0.9316, + "step": 1967 + }, + { + "epoch": 2.89, + "learning_rate": 3.3579163919438595e-05, + "loss": 0.993, + "step": 1968 + }, + { + "epoch": 2.89, + "learning_rate": 3.349105360470456e-05, + "loss": 0.984, + "step": 1969 + }, + { + "epoch": 2.9, + "learning_rate": 3.3403035783493605e-05, + "loss": 0.9208, + "step": 1970 + }, + { + "epoch": 2.9, + "learning_rate": 3.331511057821146e-05, + "loss": 0.9139, + "step": 1971 + }, + { + "epoch": 2.9, + "learning_rate": 3.322727811113516e-05, + "loss": 0.9367, + "step": 1972 + }, + { + "epoch": 2.9, + "learning_rate": 3.313953850441266e-05, + "loss": 0.9278, + "step": 1973 + }, + { + "epoch": 2.9, + "learning_rate": 3.305189188006281e-05, + "loss": 0.9714, + "step": 1974 + }, + { + "epoch": 2.9, + "learning_rate": 3.2964338359975134e-05, + "loss": 0.8818, + "step": 1975 + }, + { + "epoch": 2.9, + "learning_rate": 3.287687806590971e-05, + "loss": 1.0362, + "step": 1976 + }, + { + "epoch": 2.91, + "learning_rate": 3.2789511119496994e-05, + "loss": 0.9065, + "step": 1977 + }, + { + "epoch": 2.91, + "learning_rate": 3.270223764223755e-05, + "loss": 0.9571, + "step": 1978 + }, + { + "epoch": 2.91, + "learning_rate": 3.2615057755502e-05, + "loss": 0.9128, + "step": 1979 + }, + { + "epoch": 2.91, + "learning_rate": 3.252797158053077e-05, + "loss": 0.8249, + "step": 1980 + }, + { + "epoch": 2.91, + "learning_rate": 3.244097923843398e-05, + "loss": 1.012, + "step": 1981 + }, + { + "epoch": 2.91, + "learning_rate": 3.2354080850191324e-05, + "loss": 0.854, + "step": 1982 + }, + { + "epoch": 2.92, + "learning_rate": 3.226727653665171e-05, + "loss": 0.8708, + "step": 1983 + }, + { + "epoch": 2.92, + "learning_rate": 3.218056641853337e-05, + "loss": 0.8986, + "step": 1984 + }, + { + "epoch": 2.92, + "learning_rate": 3.2093950616423394e-05, + "loss": 0.9326, + "step": 1985 + }, + { + "epoch": 2.92, + "learning_rate": 3.200742925077775e-05, + "loss": 0.941, + "step": 1986 + }, + { + "epoch": 2.92, + "learning_rate": 3.192100244192116e-05, + "loss": 1.0446, + "step": 1987 + }, + { + "epoch": 2.92, + "learning_rate": 3.1834670310046734e-05, + "loss": 0.971, + "step": 1988 + }, + { + "epoch": 2.92, + "learning_rate": 3.174843297521596e-05, + "loss": 0.9548, + "step": 1989 + }, + { + "epoch": 2.93, + "learning_rate": 3.166229055735848e-05, + "loss": 0.8474, + "step": 1990 + }, + { + "epoch": 2.93, + "learning_rate": 3.157624317627195e-05, + "loss": 0.8806, + "step": 1991 + }, + { + "epoch": 2.93, + "learning_rate": 3.1490290951621904e-05, + "loss": 0.9161, + "step": 1992 + }, + { + "epoch": 2.93, + "learning_rate": 3.140443400294146e-05, + "loss": 0.9025, + "step": 1993 + }, + { + "epoch": 2.93, + "learning_rate": 3.1318672449631284e-05, + "loss": 0.9566, + "step": 1994 + }, + { + "epoch": 2.93, + "learning_rate": 3.123300641095935e-05, + "loss": 0.8958, + "step": 1995 + }, + { + "epoch": 2.93, + "learning_rate": 3.114743600606078e-05, + "loss": 0.9001, + "step": 1996 + }, + { + "epoch": 2.94, + "learning_rate": 3.106196135393782e-05, + "loss": 0.993, + "step": 1997 + }, + { + "epoch": 2.94, + "learning_rate": 3.09765825734594e-05, + "loss": 0.932, + "step": 1998 + }, + { + "epoch": 2.94, + "learning_rate": 3.089129978336118e-05, + "loss": 0.9425, + "step": 1999 + }, + { + "epoch": 2.94, + "learning_rate": 3.080611310224539e-05, + "loss": 0.8835, + "step": 2000 + }, + { + "epoch": 2.94, + "learning_rate": 3.0721022648580486e-05, + "loss": 0.9171, + "step": 2001 + }, + { + "epoch": 2.94, + "learning_rate": 3.063602854070123e-05, + "loss": 0.8977, + "step": 2002 + }, + { + "epoch": 2.94, + "learning_rate": 3.055113089680829e-05, + "loss": 1.0178, + "step": 2003 + }, + { + "epoch": 2.95, + "learning_rate": 3.0466329834968233e-05, + "loss": 0.906, + "step": 2004 + }, + { + "epoch": 2.95, + "learning_rate": 3.0381625473113284e-05, + "loss": 0.9512, + "step": 2005 + }, + { + "epoch": 2.95, + "learning_rate": 3.029701792904117e-05, + "loss": 0.9653, + "step": 2006 + }, + { + "epoch": 2.95, + "learning_rate": 3.0212507320415052e-05, + "loss": 0.9445, + "step": 2007 + }, + { + "epoch": 2.95, + "learning_rate": 3.0128093764763254e-05, + "loss": 0.9406, + "step": 2008 + }, + { + "epoch": 2.95, + "learning_rate": 3.0043777379479098e-05, + "loss": 0.9888, + "step": 2009 + }, + { + "epoch": 2.96, + "learning_rate": 2.9959558281820766e-05, + "loss": 0.8496, + "step": 2010 + }, + { + "epoch": 2.96, + "learning_rate": 2.9875436588911153e-05, + "loss": 0.9583, + "step": 2011 + }, + { + "epoch": 2.96, + "learning_rate": 2.979141241773775e-05, + "loss": 0.9488, + "step": 2012 + }, + { + "epoch": 2.96, + "learning_rate": 2.9707485885152363e-05, + "loss": 0.9339, + "step": 2013 + } + ], + "logging_steps": 1, + "max_steps": 2684, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 671, + "total_flos": 6.253562983968932e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2013/training_args.bin b/checkpoint-2013/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..439fe237329d4c6dab9a083d1f0b3c5d2e07ff34 --- /dev/null +++ b/checkpoint-2013/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f074b3cd0fbc5cecae753dfd6c83754f9e22c6bc7af03db47b3beb5a1a41c9 +size 4923 diff --git a/checkpoint-2684/README.md b/checkpoint-2684/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c40158a9bf29b5b6a4b1c7d97250d59a2f05ed92 --- /dev/null +++ b/checkpoint-2684/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: openlm-research/open_llama_3b_v2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-2684/adapter_config.json b/checkpoint-2684/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a75a5db14cc030f9130cc346972670dcccc55fe2 --- /dev/null +++ b/checkpoint-2684/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "openlm-research/open_llama_3b_v2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-2684/adapter_model.safetensors b/checkpoint-2684/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1eedebb10db9d0c63007d19fdab8e7125cae2b9e --- /dev/null +++ b/checkpoint-2684/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:601a59770b8ccda2a8d368743846f780b93395dc89ae3527412f1875abeb62cf +size 50899792 diff --git a/checkpoint-2684/optimizer.pt b/checkpoint-2684/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6f7b0bfc757cacb85979f01a229c9e3e9a098a2 --- /dev/null +++ b/checkpoint-2684/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75512e8570baba4e53538856b39d2998e4ce7657bd812d520e99ca4e1ee1c205 +size 25871439 diff --git a/checkpoint-2684/rng_state_0.pth b/checkpoint-2684/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c365a13693633e230d1f289bd3daddddc258336 --- /dev/null +++ b/checkpoint-2684/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1d35f5d839d2cfbea42506eada67018bf4b3512c922d753f96146530a4e825 +size 21687 diff --git a/checkpoint-2684/rng_state_1.pth b/checkpoint-2684/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4aae908adccd4253d9037c3b1a81197f2a58cb8 --- /dev/null +++ b/checkpoint-2684/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665197df97c86851000409227ef5e3ab909a7ef41724ad25ca7145cba3c11460 +size 21687 diff --git a/checkpoint-2684/rng_state_2.pth b/checkpoint-2684/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8448cf63fb7c01b985d46473cba5ab4bb39acf7 --- /dev/null +++ b/checkpoint-2684/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4d57374813f84812879d22d988522bcb299b3f010c6c53d0295fa8e86ba40d +size 21687 diff --git a/checkpoint-2684/rng_state_3.pth b/checkpoint-2684/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..49ef46e8730ff151bb3ef8a303457108c39edfef --- /dev/null +++ b/checkpoint-2684/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cdaa5ce5617a4008b1b2260d65967bcf1007d0813eddf627a0d24ceba3f2d67 +size 21687 diff --git a/checkpoint-2684/rng_state_4.pth b/checkpoint-2684/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c4a271169f274ab3ab8405e979e4345d8611870 --- /dev/null +++ b/checkpoint-2684/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9c763e22ea9d8cb9c2d8d5104276ad313be2e6f7fa2ac71e58d0a571f63a44 +size 21687 diff --git a/checkpoint-2684/rng_state_5.pth b/checkpoint-2684/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..b24e4ad3d50f60181f120233800e0cfa27dc38dc --- /dev/null +++ b/checkpoint-2684/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cbf1efdd770aab1540fce1cbc6f7b75f5d0f3ad830ab7b94e10e373f2bf3f3 +size 21687 diff --git a/checkpoint-2684/rng_state_6.pth b/checkpoint-2684/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba0dcf02f242f4509acd0d09e71e9d725744f302 --- /dev/null +++ b/checkpoint-2684/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432a7f6fef6d4fb6339b2c88b25ddbedcb9bef99e93faa80a540b9e155b305a6 +size 21687 diff --git a/checkpoint-2684/rng_state_7.pth b/checkpoint-2684/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..314caf4041f3edad02debd37046e346d8225a16e --- /dev/null +++ b/checkpoint-2684/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46c4edfc8716406b677deb25e6523e0e79598978634442aa1bace0ac0be403f +size 21687 diff --git a/checkpoint-2684/scheduler.pt b/checkpoint-2684/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a13a8e940c9305d45227b8a658106d469f760e --- /dev/null +++ b/checkpoint-2684/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b33536dba07d0886c93b1b512aebac211dbef9b4cd7b78bb076adf388863a8 +size 627 diff --git a/checkpoint-2684/trainer_state.json b/checkpoint-2684/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40d69f134f110a4bf568edd09b4b9313e460b238 --- /dev/null +++ b/checkpoint-2684/trainer_state.json @@ -0,0 +1,16253 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.938897168405365, + "eval_steps": 168, + "global_step": 2684, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.3745, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 1.6296857595443726, + "eval_runtime": 2.6662, + "eval_samples_per_second": 409.572, + "eval_steps_per_second": 25.88, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.42, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.3057, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 1.2307, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.289, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.4111, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 7e-05, + "loss": 1.3089, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.3204, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 9e-05, + "loss": 1.3575, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.3279, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011000000000000002, + "loss": 1.3149, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012, + "loss": 1.2578, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013000000000000002, + "loss": 1.2849, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.2971, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015000000000000001, + "loss": 1.1473, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.1943, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017, + "loss": 1.1877, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018, + "loss": 1.1984, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019, + "loss": 1.2647, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.217, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999993046535236, + "loss": 1.0274, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999972186150606, + "loss": 1.2122, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999937418875124, + "loss": 1.1868, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999888744757143, + "loss": 1.2345, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999826163864348, + "loss": 1.2127, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999749676283775, + "loss": 1.2114, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999659282121792, + "loss": 1.2224, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955498150411, + "loss": 1.1517, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999943677457578, + "loss": 1.1631, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999930466150119, + "loss": 1.0465, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999915864246407, + "loss": 1.1847, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999899871766749, + "loss": 1.1238, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999882488733385, + "loss": 1.1491, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199986371517049, + "loss": 1.276, + "step": 34 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999843551104172, + "loss": 1.0911, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998219965624734, + "loss": 1.1276, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997990515753693, + "loss": 1.0981, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997747161747695, + "loss": 1.0901, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999748990394517, + "loss": 1.096, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997218742703887, + "loss": 1.122, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996933678400946, + "loss": 1.1132, + "step": 41 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996634711432786, + "loss": 1.1498, + "step": 42 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996321842215173, + "loss": 1.0708, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999599507118322, + "loss": 1.1154, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995654398791355, + "loss": 1.2118, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995299825513357, + "loss": 1.0919, + "step": 46 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994931351842327, + "loss": 1.1364, + "step": 47 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994548978290695, + "loss": 1.1442, + "step": 48 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999415270539023, + "loss": 1.1248, + "step": 49 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019993742533692022, + "loss": 1.1366, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019993318463766495, + "loss": 1.1437, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199928804962034, + "loss": 1.1191, + "step": 52 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999242863161182, + "loss": 1.0786, + "step": 53 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991962870620153, + "loss": 1.1951, + "step": 54 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991483213876134, + "loss": 1.1321, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019990989662046818, + "loss": 1.0876, + "step": 56 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999048221581858, + "loss": 1.1794, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989960875897126, + "loss": 1.1796, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989425643007476, + "loss": 1.1165, + "step": 59 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998887651789398, + "loss": 1.1978, + "step": 60 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019988313501320297, + "loss": 1.1693, + "step": 61 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019987736594069414, + "loss": 1.1553, + "step": 62 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998714579694363, + "loss": 1.1959, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019986541110764565, + "loss": 1.1945, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985922536373146, + "loss": 1.121, + "step": 65 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985290074629627, + "loss": 1.122, + "step": 66 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019984643726413565, + "loss": 1.1435, + "step": 67 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019983983492623833, + "loss": 1.0413, + "step": 68 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998330937417861, + "loss": 1.078, + "step": 69 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998262137201539, + "loss": 1.0811, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981919487090972, + "loss": 1.1639, + "step": 71 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981203720381463, + "loss": 1.164, + "step": 72 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019980474072882277, + "loss": 1.1006, + "step": 73 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019979730545608126, + "loss": 1.1926, + "step": 74 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001997897313959303, + "loss": 1.1129, + "step": 75 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019978201855890308, + "loss": 1.1367, + "step": 76 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019977416695572578, + "loss": 1.1495, + "step": 77 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997661765973176, + "loss": 1.1567, + "step": 78 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975804749479062, + "loss": 1.2102, + "step": 79 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974977965945, + "loss": 1.1175, + "step": 80 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997413731027937, + "loss": 1.1243, + "step": 81 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973282783651263, + "loss": 1.1406, + "step": 82 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972414387249072, + "loss": 1.09, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019971532122280464, + "loss": 1.0115, + "step": 84 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019970635989972402, + "loss": 1.0328, + "step": 85 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019969725991571128, + "loss": 1.1226, + "step": 86 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019968802128342172, + "loss": 1.0747, + "step": 87 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019967864401570343, + "loss": 1.119, + "step": 88 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019966912812559732, + "loss": 1.1125, + "step": 89 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019965947362633708, + "loss": 1.0734, + "step": 90 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996496805313491, + "loss": 1.1798, + "step": 91 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019963974885425266, + "loss": 1.1461, + "step": 92 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996296786088596, + "loss": 1.0397, + "step": 93 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019961946980917456, + "loss": 1.17, + "step": 94 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019960912246939485, + "loss": 1.0679, + "step": 95 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019959863660391045, + "loss": 1.0839, + "step": 96 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019958801222730394, + "loss": 1.0937, + "step": 97 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019957724935435063, + "loss": 1.1668, + "step": 98 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019956634800001832, + "loss": 1.0858, + "step": 99 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019955530817946748, + "loss": 1.0935, + "step": 100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019954412990805107, + "loss": 1.1046, + "step": 101 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019953281320131468, + "loss": 1.1319, + "step": 102 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019952135807499633, + "loss": 1.1108, + "step": 103 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001995097645450266, + "loss": 1.0485, + "step": 104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019949803262752855, + "loss": 1.0862, + "step": 105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019948616233881768, + "loss": 1.268, + "step": 106 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019947415369540189, + "loss": 1.0926, + "step": 107 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001994620067139815, + "loss": 1.1427, + "step": 108 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019944972141144928, + "loss": 1.0754, + "step": 109 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019943729780489027, + "loss": 1.0044, + "step": 110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001994247359115819, + "loss": 1.1304, + "step": 111 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019941203574899393, + "loss": 1.1683, + "step": 112 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019939919733478838, + "loss": 1.1559, + "step": 113 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019938622068681953, + "loss": 1.1879, + "step": 114 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019937310582313392, + "loss": 1.0613, + "step": 115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993598527619703, + "loss": 1.1196, + "step": 116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993464615217596, + "loss": 1.0762, + "step": 117 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019933293212112495, + "loss": 1.1059, + "step": 118 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019931926457888156, + "loss": 1.0831, + "step": 119 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019930545891403678, + "loss": 1.0552, + "step": 120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019929151514579008, + "loss": 1.15, + "step": 121 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019927743329353295, + "loss": 1.1038, + "step": 122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992632133768489, + "loss": 1.067, + "step": 123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992488554155135, + "loss": 1.1311, + "step": 124 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019923435942949426, + "loss": 1.1402, + "step": 125 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019921972543895066, + "loss": 1.0453, + "step": 126 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019920495346423402, + "loss": 1.1567, + "step": 127 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019919004352588767, + "loss": 1.137, + "step": 128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001991749956446468, + "loss": 0.9986, + "step": 129 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019915980984143832, + "loss": 1.083, + "step": 130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019914448613738106, + "loss": 1.0619, + "step": 131 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019912902455378556, + "loss": 1.1294, + "step": 132 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019911342511215414, + "loss": 1.0965, + "step": 133 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019909768783418086, + "loss": 1.0216, + "step": 134 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019908181274175138, + "loss": 1.0081, + "step": 135 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990657998569432, + "loss": 1.0246, + "step": 136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990496492020252, + "loss": 1.1249, + "step": 137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019903336079945804, + "loss": 1.0518, + "step": 138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019901693467189386, + "loss": 1.189, + "step": 139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019900037084217637, + "loss": 1.1475, + "step": 140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989836693333408, + "loss": 1.2259, + "step": 141 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989668301686138, + "loss": 1.0399, + "step": 142 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989498533714135, + "loss": 1.128, + "step": 143 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019893273896534936, + "loss": 1.014, + "step": 144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001989154869742223, + "loss": 1.1552, + "step": 145 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019889809742202455, + "loss": 1.1159, + "step": 146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988805703329396, + "loss": 1.0218, + "step": 147 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019886290573134228, + "loss": 1.1723, + "step": 148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988451036417986, + "loss": 1.2132, + "step": 149 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019882716408906585, + "loss": 1.112, + "step": 150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001988090870980924, + "loss": 1.0856, + "step": 151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001987908726940178, + "loss": 1.0951, + "step": 152 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019877252090217271, + "loss": 1.0218, + "step": 153 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019875403174807882, + "loss": 1.0552, + "step": 154 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019873540525744887, + "loss": 1.1481, + "step": 155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019871664145618657, + "loss": 1.169, + "step": 156 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019869774037038665, + "loss": 1.0802, + "step": 157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986787020263347, + "loss": 1.0871, + "step": 158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986595264505072, + "loss": 1.1022, + "step": 159 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019864021366957147, + "loss": 1.0257, + "step": 160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986207637103857, + "loss": 1.0986, + "step": 161 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019860117659999878, + "loss": 1.0837, + "step": 162 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019858145236565037, + "loss": 1.1895, + "step": 163 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019856159103477086, + "loss": 1.052, + "step": 164 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019854159263498123, + "loss": 1.1184, + "step": 165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001985214571940931, + "loss": 1.0895, + "step": 166 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019850118474010872, + "loss": 1.0764, + "step": 167 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019848077530122083, + "loss": 1.1387, + "step": 168 + }, + { + "epoch": 0.25, + "eval_loss": 1.084919810295105, + "eval_runtime": 2.6029, + "eval_samples_per_second": 419.538, + "eval_steps_per_second": 26.509, + "step": 168 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019846022890581267, + "loss": 1.0826, + "step": 169 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198439545582458, + "loss": 1.1366, + "step": 170 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198418725359921, + "loss": 1.1349, + "step": 171 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019839776826715614, + "loss": 1.0636, + "step": 172 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019837667433330838, + "loss": 1.1216, + "step": 173 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001983554435877128, + "loss": 1.1051, + "step": 174 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019833407605989494, + "loss": 1.1558, + "step": 175 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019831257177957044, + "loss": 1.0364, + "step": 176 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019829093077664513, + "loss": 1.0665, + "step": 177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019826915308121504, + "loss": 1.1994, + "step": 178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982472387235662, + "loss": 1.1434, + "step": 179 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982251877341748, + "loss": 1.081, + "step": 180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000198203000143707, + "loss": 1.0653, + "step": 181 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981806759830189, + "loss": 1.0269, + "step": 182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981582152831566, + "loss": 1.1167, + "step": 183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019813561807535598, + "loss": 1.0608, + "step": 184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001981128843910428, + "loss": 1.0989, + "step": 185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980900142618327, + "loss": 1.1405, + "step": 186 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019806700771953097, + "loss": 1.0359, + "step": 187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980438647961327, + "loss": 1.1073, + "step": 188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980205855238225, + "loss": 1.0338, + "step": 189 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019799716993497475, + "loss": 1.1285, + "step": 190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019797361806215332, + "loss": 1.1277, + "step": 191 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019794992993811165, + "loss": 1.119, + "step": 192 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019792610559579265, + "loss": 1.1224, + "step": 193 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019790214506832868, + "loss": 1.1438, + "step": 194 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001978780483890414, + "loss": 1.1462, + "step": 195 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019785381559144196, + "loss": 1.042, + "step": 196 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019782944670923076, + "loss": 1.1022, + "step": 197 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019780494177629735, + "loss": 1.0564, + "step": 198 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019778030082672068, + "loss": 1.0471, + "step": 199 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019775552389476864, + "loss": 1.0636, + "step": 200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001977306110148984, + "loss": 1.0917, + "step": 201 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019770556222175608, + "loss": 1.1965, + "step": 202 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019768037755017685, + "loss": 1.073, + "step": 203 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019765505703518496, + "loss": 1.0636, + "step": 204 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019762960071199333, + "loss": 1.087, + "step": 205 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197604008616004, + "loss": 1.0569, + "step": 206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019757828078280766, + "loss": 1.08, + "step": 207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019755241724818387, + "loss": 1.1536, + "step": 208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019752641804810084, + "loss": 1.1514, + "step": 209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019750028321871546, + "loss": 1.0691, + "step": 210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019747401279637325, + "loss": 1.1289, + "step": 211 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019744760681760832, + "loss": 1.0834, + "step": 212 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019742106531914328, + "loss": 1.0762, + "step": 213 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001973943883378892, + "loss": 1.0913, + "step": 214 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019736757591094558, + "loss": 1.132, + "step": 215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019734062807560027, + "loss": 1.0894, + "step": 216 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019731354486932944, + "loss": 1.0327, + "step": 217 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019728632632979746, + "loss": 1.112, + "step": 218 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019725897249485704, + "loss": 1.0718, + "step": 219 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019723148340254892, + "loss": 1.077, + "step": 220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019720385909110198, + "loss": 1.0335, + "step": 221 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019717609959893318, + "loss": 1.0483, + "step": 222 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019714820496464746, + "loss": 1.0901, + "step": 223 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019712017522703764, + "loss": 0.9921, + "step": 224 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019709201042508455, + "loss": 1.0829, + "step": 225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970637105979567, + "loss": 1.0705, + "step": 226 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970352757850105, + "loss": 1.0481, + "step": 227 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019700670602579008, + "loss": 0.9846, + "step": 228 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001969780013600272, + "loss": 1.1492, + "step": 229 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019694916182764113, + "loss": 1.1745, + "step": 230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019692018746873892, + "loss": 1.0451, + "step": 231 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019689107832361496, + "loss": 1.1217, + "step": 232 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019686183443275116, + "loss": 1.0788, + "step": 233 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019683245583681675, + "loss": 1.0703, + "step": 234 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019680294257666837, + "loss": 1.1521, + "step": 235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967732946933499, + "loss": 1.0659, + "step": 236 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019674351222809242, + "loss": 1.0321, + "step": 237 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967135952223142, + "loss": 1.0555, + "step": 238 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019668354371762066, + "loss": 1.0648, + "step": 239 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019665335775580415, + "loss": 1.0723, + "step": 240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001966230373788441, + "loss": 1.0264, + "step": 241 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019659258262890683, + "loss": 1.0331, + "step": 242 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019656199354834558, + "loss": 1.1514, + "step": 243 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019653127017970034, + "loss": 1.069, + "step": 244 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019650041256569792, + "loss": 0.9623, + "step": 245 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019646942074925172, + "loss": 1.0021, + "step": 246 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019643829477346188, + "loss": 1.1131, + "step": 247 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001964070346816151, + "loss": 1.1426, + "step": 248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001963756405171845, + "loss": 1.0761, + "step": 249 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019634411232382978, + "loss": 1.1112, + "step": 250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019631245014539698, + "loss": 1.081, + "step": 251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019628065402591845, + "loss": 1.1446, + "step": 252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019624872400961284, + "loss": 1.045, + "step": 253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019621666014088494, + "loss": 1.0337, + "step": 254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019618446246432583, + "loss": 1.1764, + "step": 255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019615213102471257, + "loss": 1.0323, + "step": 256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019611966586700823, + "loss": 1.0073, + "step": 257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019608706703636188, + "loss": 1.1615, + "step": 258 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019605433457810855, + "loss": 1.1209, + "step": 259 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019602146853776894, + "loss": 1.0721, + "step": 260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001959884689610497, + "loss": 1.0967, + "step": 261 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019595533589384308, + "loss": 1.0284, + "step": 262 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019592206938222703, + "loss": 1.0148, + "step": 263 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019588866947246498, + "loss": 1.1434, + "step": 264 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019585513621100603, + "loss": 1.1125, + "step": 265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001958214696444846, + "loss": 1.0812, + "step": 266 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019578766981972058, + "loss": 1.0611, + "step": 267 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019575373678371909, + "loss": 1.1029, + "step": 268 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019571967058367064, + "loss": 1.0692, + "step": 269 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019568547126695083, + "loss": 1.0581, + "step": 270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019565113888112036, + "loss": 0.9841, + "step": 271 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019561667347392508, + "loss": 1.0173, + "step": 272 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019558207509329584, + "loss": 1.0805, + "step": 273 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019554734378734824, + "loss": 1.088, + "step": 274 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019551247960438296, + "loss": 1.0481, + "step": 275 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019547748259288536, + "loss": 1.1747, + "step": 276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001954423528015255, + "loss": 1.0407, + "step": 277 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019540709027915818, + "loss": 1.1412, + "step": 278 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953716950748227, + "loss": 1.075, + "step": 279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019533616723774294, + "loss": 0.9863, + "step": 280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953005068173272, + "loss": 1.1426, + "step": 281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001952647138631682, + "loss": 1.0621, + "step": 282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019522878842504295, + "loss": 1.1007, + "step": 283 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019519273055291266, + "loss": 1.0632, + "step": 284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019515654029692278, + "loss": 1.126, + "step": 285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019512021770740288, + "loss": 1.0946, + "step": 286 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001950837628348665, + "loss": 1.0639, + "step": 287 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019504717573001117, + "loss": 1.1432, + "step": 288 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019501045644371832, + "loss": 1.0619, + "step": 289 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001949736050270532, + "loss": 1.0597, + "step": 290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019493662153126481, + "loss": 1.0743, + "step": 291 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001948995060077859, + "loss": 1.1114, + "step": 292 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019486225850823266, + "loss": 1.1435, + "step": 293 + }, + { + "epoch": 0.44, + "learning_rate": 0.000194824879084405, + "loss": 1.1396, + "step": 294 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019478736778828624, + "loss": 1.1597, + "step": 295 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019474972467204297, + "loss": 1.0976, + "step": 296 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019471194978802533, + "loss": 1.0829, + "step": 297 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001946740431887665, + "loss": 1.0437, + "step": 298 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019463600492698296, + "loss": 1.0835, + "step": 299 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019459783505557424, + "loss": 1.0558, + "step": 300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001945595336276229, + "loss": 1.0656, + "step": 301 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019452110069639452, + "loss": 1.1487, + "step": 302 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019448253631533744, + "loss": 1.1383, + "step": 303 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019444384053808288, + "loss": 1.1582, + "step": 304 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019440501341844483, + "loss": 0.9999, + "step": 305 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019436605501041987, + "loss": 1.1317, + "step": 306 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019432696536818717, + "loss": 1.0944, + "step": 307 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019428774454610843, + "loss": 1.1624, + "step": 308 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019424839259872778, + "loss": 1.1644, + "step": 309 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019420890958077167, + "loss": 1.0486, + "step": 310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019416929554714888, + "loss": 1.0705, + "step": 311 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019412955055295034, + "loss": 1.023, + "step": 312 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019408967465344917, + "loss": 1.1144, + "step": 313 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019404966790410047, + "loss": 1.0378, + "step": 314 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019400953036054138, + "loss": 1.036, + "step": 315 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019396926207859084, + "loss": 1.0735, + "step": 316 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019392886311424973, + "loss": 1.0259, + "step": 317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001938883335237006, + "loss": 1.1603, + "step": 318 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938476733633076, + "loss": 1.1282, + "step": 319 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938068826896166, + "loss": 1.063, + "step": 320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019376596155935486, + "loss": 1.1176, + "step": 321 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019372491002943112, + "loss": 1.1307, + "step": 322 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019368372815693549, + "loss": 1.0412, + "step": 323 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019364241599913924, + "loss": 1.1353, + "step": 324 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019360097361349494, + "loss": 1.1293, + "step": 325 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001935594010576362, + "loss": 1.0885, + "step": 326 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019351769838937775, + "loss": 1.0944, + "step": 327 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019347586566671512, + "loss": 1.1435, + "step": 328 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001934339029478248, + "loss": 1.1217, + "step": 329 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019339181029106404, + "loss": 1.1801, + "step": 330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019334958775497083, + "loss": 1.1846, + "step": 331 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019330723539826375, + "loss": 1.0897, + "step": 332 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019326475327984192, + "loss": 1.0643, + "step": 333 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019322214145878487, + "loss": 1.0246, + "step": 334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001931793999943526, + "loss": 1.1108, + "step": 335 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019313652894598543, + "loss": 1.0619, + "step": 336 + }, + { + "epoch": 0.5, + "eval_loss": 1.048388123512268, + "eval_runtime": 2.6045, + "eval_samples_per_second": 419.273, + "eval_steps_per_second": 26.493, + "step": 336 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019309352837330372, + "loss": 1.0014, + "step": 337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001930503983361081, + "loss": 1.0786, + "step": 338 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019300713889437926, + "loss": 1.014, + "step": 339 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019296375010827773, + "loss": 1.1233, + "step": 340 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192920232038144, + "loss": 1.1052, + "step": 341 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001928765847444984, + "loss": 1.0138, + "step": 342 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019283280828804081, + "loss": 1.1536, + "step": 343 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019278890272965096, + "loss": 0.992, + "step": 344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001927448681303879, + "loss": 1.1165, + "step": 345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001927007045514903, + "loss": 1.0565, + "step": 346 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019265641205437611, + "loss": 1.0664, + "step": 347 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001926119907006426, + "loss": 1.0625, + "step": 348 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019256744055206622, + "loss": 1.0393, + "step": 349 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001925227616706026, + "loss": 1.125, + "step": 350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019247795411838627, + "loss": 1.0375, + "step": 351 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019243301795773086, + "loss": 1.0648, + "step": 352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001923879532511287, + "loss": 1.0903, + "step": 353 + }, + { + "epoch": 0.53, + "learning_rate": 0.000192342760061251, + "loss": 1.1219, + "step": 354 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019229743845094755, + "loss": 1.054, + "step": 355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001922519884832469, + "loss": 1.1206, + "step": 356 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019220641022135588, + "loss": 1.1125, + "step": 357 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019216070372865996, + "loss": 1.064, + "step": 358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001921148690687228, + "loss": 1.0843, + "step": 359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019206890630528634, + "loss": 1.1378, + "step": 360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019202281550227064, + "loss": 1.0399, + "step": 361 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919765967237739, + "loss": 1.1762, + "step": 362 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919302500340722, + "loss": 1.0538, + "step": 363 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019188377549761963, + "loss": 1.0343, + "step": 364 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001918371731790479, + "loss": 1.1027, + "step": 365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019179044314316664, + "loss": 1.036, + "step": 366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019174358545496288, + "loss": 1.041, + "step": 367 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019169660017960137, + "loss": 1.0762, + "step": 368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019164948738242409, + "loss": 1.0807, + "step": 369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019160224712895055, + "loss": 1.037, + "step": 370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019155487948487748, + "loss": 1.0625, + "step": 371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001915073845160786, + "loss": 1.062, + "step": 372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019145976228860496, + "loss": 1.1882, + "step": 373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019141201286868435, + "loss": 1.1338, + "step": 374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019136413632272163, + "loss": 1.0174, + "step": 375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019131613271729833, + "loss": 1.0585, + "step": 376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019126800211917276, + "loss": 1.0495, + "step": 377 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001912197445952798, + "loss": 1.123, + "step": 378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019117136021273075, + "loss": 1.0517, + "step": 379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001911228490388136, + "loss": 1.0545, + "step": 380 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019107421114099237, + "loss": 1.0302, + "step": 381 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019102544658690748, + "loss": 1.0908, + "step": 382 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019097655544437545, + "loss": 1.1425, + "step": 383 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019092753778138886, + "loss": 1.0686, + "step": 384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001908783936661162, + "loss": 1.06, + "step": 385 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001908291231669019, + "loss": 1.1296, + "step": 386 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019077972635226604, + "loss": 1.1029, + "step": 387 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019073020329090444, + "loss": 1.0469, + "step": 388 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001906805540516885, + "loss": 1.0427, + "step": 389 + }, + { + "epoch": 0.58, + "learning_rate": 0.000190630778703665, + "loss": 1.0075, + "step": 390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019058087731605624, + "loss": 1.1146, + "step": 391 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001905308499582597, + "loss": 1.1161, + "step": 392 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019048069669984802, + "loss": 1.1419, + "step": 393 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019043041761056907, + "loss": 1.1586, + "step": 394 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019038001276034557, + "loss": 1.0765, + "step": 395 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019032948221927524, + "loss": 1.1225, + "step": 396 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001902788260576305, + "loss": 1.0247, + "step": 397 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019022804434585852, + "loss": 1.135, + "step": 398 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901771371545811, + "loss": 1.1122, + "step": 399 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019012610455459446, + "loss": 1.075, + "step": 400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019007494661686935, + "loss": 1.1121, + "step": 401 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001900236634125507, + "loss": 1.0531, + "step": 402 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018997225501295772, + "loss": 1.0561, + "step": 403 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018992072148958368, + "loss": 1.0803, + "step": 404 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018986906291409595, + "loss": 1.0579, + "step": 405 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018981727935833567, + "loss": 1.0614, + "step": 406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001897653708943179, + "loss": 0.9982, + "step": 407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018971333759423142, + "loss": 1.1498, + "step": 408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018966117953043852, + "loss": 1.1165, + "step": 409 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018960889677547505, + "loss": 1.1155, + "step": 410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018955648940205028, + "loss": 1.0017, + "step": 411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018950395748304678, + "loss": 1.0556, + "step": 412 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018945130109152033, + "loss": 1.0248, + "step": 413 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018939852030069981, + "loss": 1.0155, + "step": 414 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934561518398706, + "loss": 1.0248, + "step": 415 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018929258581495685, + "loss": 0.9835, + "step": 416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001892394322673568, + "loss": 1.1602, + "step": 417 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891861546151071, + "loss": 1.021, + "step": 418 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018913275293230069, + "loss": 1.0526, + "step": 419 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018907922729320285, + "loss": 1.0585, + "step": 420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018902557777225135, + "loss": 1.0327, + "step": 421 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018897180444405614, + "loss": 1.0448, + "step": 422 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001889179073833995, + "loss": 1.0776, + "step": 423 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888638866652356, + "loss": 1.0748, + "step": 424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888097423646907, + "loss": 1.0482, + "step": 425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018875547455706295, + "loss": 1.0394, + "step": 426 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018870108331782217, + "loss": 1.0646, + "step": 427 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018864656872260985, + "loss": 1.0338, + "step": 428 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018859193084723913, + "loss": 0.9848, + "step": 429 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001885371697676944, + "loss": 1.0587, + "step": 430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001884822855601316, + "loss": 1.0711, + "step": 431 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018842727830087778, + "loss": 1.0964, + "step": 432 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018837214806643115, + "loss": 1.0254, + "step": 433 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018831689493346095, + "loss": 1.0748, + "step": 434 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018826151897880728, + "loss": 1.0797, + "step": 435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018820602027948114, + "loss": 1.1068, + "step": 436 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018815039891266418, + "loss": 1.081, + "step": 437 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001880946549557086, + "loss": 1.0685, + "step": 438 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018803878848613716, + "loss": 1.0916, + "step": 439 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018798279958164295, + "loss": 1.115, + "step": 440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018792668832008936, + "loss": 1.0048, + "step": 441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001878704547795099, + "loss": 1.0386, + "step": 442 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018781409903810821, + "loss": 1.0283, + "step": 443 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018775762117425777, + "loss": 1.085, + "step": 444 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018770102126650198, + "loss": 1.0582, + "step": 445 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018764429939355392, + "loss": 1.0705, + "step": 446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875874556342963, + "loss": 1.1426, + "step": 447 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018753049006778132, + "loss": 1.0337, + "step": 448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001874734027732306, + "loss": 1.0993, + "step": 449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018741619383003507, + "loss": 1.0661, + "step": 450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018735886331775476, + "loss": 1.0564, + "step": 451 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018730141131611882, + "loss": 1.0989, + "step": 452 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001872438379050254, + "loss": 1.0984, + "step": 453 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018718614316454133, + "loss": 1.1173, + "step": 454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018712832717490235, + "loss": 1.1005, + "step": 455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018707039001651277, + "loss": 1.0008, + "step": 456 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018701233176994533, + "loss": 1.0701, + "step": 457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018695415251594123, + "loss": 1.0831, + "step": 458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018689585233541003, + "loss": 1.1165, + "step": 459 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018683743130942928, + "loss": 1.0884, + "step": 460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018677888951924474, + "loss": 0.9882, + "step": 461 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018672022704627002, + "loss": 1.086, + "step": 462 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018666144397208668, + "loss": 1.0545, + "step": 463 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018660254037844388, + "loss": 1.0274, + "step": 464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001865435163472584, + "loss": 1.0795, + "step": 465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018648437196061462, + "loss": 1.022, + "step": 466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001864251073007642, + "loss": 1.0717, + "step": 467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018636572245012606, + "loss": 1.1501, + "step": 468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001863062174912863, + "loss": 1.1034, + "step": 469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018624659250699805, + "loss": 1.0784, + "step": 470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018618684758018136, + "loss": 1.1274, + "step": 471 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001861269827939231, + "loss": 1.0643, + "step": 472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606699823147676, + "loss": 1.1394, + "step": 473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018600689397626246, + "loss": 0.9665, + "step": 474 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018594667011186678, + "loss": 1.058, + "step": 475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018588632672204264, + "loss": 1.0706, + "step": 476 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858258638907091, + "loss": 1.0414, + "step": 477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576528170195146, + "loss": 1.1, + "step": 478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018570458024002093, + "loss": 1.1114, + "step": 479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018564375958933459, + "loss": 1.0596, + "step": 480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855828198344753, + "loss": 1.0897, + "step": 481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018552176106019155, + "loss": 1.0316, + "step": 482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018546058335139733, + "loss": 1.0516, + "step": 483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001853992867931721, + "loss": 1.0477, + "step": 484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018533787147076048, + "loss": 1.0432, + "step": 485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018527633746957234, + "loss": 1.0568, + "step": 486 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018521468487518264, + "loss": 1.114, + "step": 487 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018515291377333112, + "loss": 1.0664, + "step": 488 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850910242499225, + "loss": 1.0162, + "step": 489 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850290163910261, + "loss": 1.0829, + "step": 490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018496689028287572, + "loss": 1.1078, + "step": 491 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849046460118698, + "loss": 1.0533, + "step": 492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484228366457095, + "loss": 1.0923, + "step": 493 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018477980332770607, + "loss": 1.0516, + "step": 494 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018471720508816614, + "loss": 0.9826, + "step": 495 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018465448903300606, + "loss": 1.1581, + "step": 496 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001845916552494446, + "loss": 1.1268, + "step": 497 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018452870382486432, + "loss": 1.0483, + "step": 498 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018446563484681127, + "loss": 1.1792, + "step": 499 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018440244840299506, + "loss": 1.0918, + "step": 500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001843391445812886, + "loss": 0.9691, + "step": 501 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018427572346972805, + "loss": 1.0581, + "step": 502 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001842121851565128, + "loss": 1.0072, + "step": 503 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018414852973000503, + "loss": 0.9686, + "step": 504 + }, + { + "epoch": 0.75, + "eval_loss": 1.0276715755462646, + "eval_runtime": 2.6054, + "eval_samples_per_second": 419.124, + "eval_steps_per_second": 26.483, + "step": 504 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018408475727872995, + "loss": 1.1221, + "step": 505 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018402086789137546, + "loss": 1.087, + "step": 506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018395686165679202, + "loss": 1.0599, + "step": 507 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018389273866399275, + "loss": 1.1844, + "step": 508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018382849900215294, + "loss": 1.046, + "step": 509 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018376414276061032, + "loss": 0.9691, + "step": 510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018369967002886464, + "loss": 1.0996, + "step": 511 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001836350808965776, + "loss": 1.083, + "step": 512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018357037545357297, + "loss": 1.0371, + "step": 513 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018350555378983608, + "loss": 1.018, + "step": 514 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018344061599551398, + "loss": 1.095, + "step": 515 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018337556216091517, + "loss": 1.0871, + "step": 516 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001833103923765096, + "loss": 1.0774, + "step": 517 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018324510673292842, + "loss": 1.0337, + "step": 518 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001831797053209639, + "loss": 1.0059, + "step": 519 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018311418823156936, + "loss": 1.0744, + "step": 520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018304855555585894, + "loss": 0.9732, + "step": 521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018298280738510752, + "loss": 1.1176, + "step": 522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018291694381075056, + "loss": 1.1485, + "step": 523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018285096492438424, + "loss": 1.1044, + "step": 524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018278487081776476, + "loss": 0.9812, + "step": 525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018271866158280884, + "loss": 1.0966, + "step": 526 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001826523373115931, + "loss": 1.2406, + "step": 527 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001825858980963543, + "loss": 1.0727, + "step": 528 + }, + { + "epoch": 0.79, + "learning_rate": 0.000182519344029489, + "loss": 0.9966, + "step": 529 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018245267520355346, + "loss": 1.081, + "step": 530 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018238589171126353, + "loss": 1.1104, + "step": 531 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018231899364549455, + "loss": 1.0535, + "step": 532 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018225198109928114, + "loss": 1.0801, + "step": 533 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018218485416581726, + "loss": 1.0726, + "step": 534 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018211761293845585, + "loss": 1.0923, + "step": 535 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018205025751070875, + "loss": 1.0551, + "step": 536 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018198278797624675, + "loss": 1.0495, + "step": 537 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001819152044288992, + "loss": 1.0589, + "step": 538 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018184750696265408, + "loss": 1.0487, + "step": 539 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001817796956716578, + "loss": 1.0491, + "step": 540 + }, + { + "epoch": 0.81, + "learning_rate": 0.000181711770650215, + "loss": 1.0981, + "step": 541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018164373199278856, + "loss": 1.1706, + "step": 542 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001815755797939994, + "loss": 1.1024, + "step": 543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150731414862622, + "loss": 1.0488, + "step": 544 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018143893515160564, + "loss": 1.165, + "step": 545 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018137044289803181, + "loss": 1.0346, + "step": 546 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018130183748315645, + "loss": 1.1179, + "step": 547 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001812331190023886, + "loss": 1.0027, + "step": 548 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018116428755129459, + "loss": 1.1106, + "step": 549 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018109534322559783, + "loss": 1.0479, + "step": 550 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018102628612117865, + "loss": 1.0046, + "step": 551 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001809571163340744, + "loss": 0.9883, + "step": 552 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018088783396047893, + "loss": 1.1018, + "step": 553 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018081843909674276, + "loss": 1.1389, + "step": 554 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018074893183937283, + "loss": 1.0751, + "step": 555 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018067931228503246, + "loss": 1.1475, + "step": 556 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018060958053054096, + "loss": 1.0829, + "step": 557 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018053973667287387, + "loss": 1.0272, + "step": 558 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018046978080916252, + "loss": 1.0668, + "step": 559 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018039971303669407, + "loss": 1.0988, + "step": 560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018032953345291123, + "loss": 1.0339, + "step": 561 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001802592421554123, + "loss": 1.0654, + "step": 562 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018018883924195085, + "loss": 1.0157, + "step": 563 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018011832481043576, + "loss": 1.0738, + "step": 564 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001800476989589309, + "loss": 1.0742, + "step": 565 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001799769617856552, + "loss": 0.9861, + "step": 566 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001799061133889823, + "loss": 1.0788, + "step": 567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017983515386744061, + "loss": 1.0539, + "step": 568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017976408331971298, + "loss": 1.0875, + "step": 569 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001796929018446368, + "loss": 1.0765, + "step": 570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017962160954120354, + "loss": 1.1336, + "step": 571 + }, + { + "epoch": 0.85, + "learning_rate": 0.000179550206508559, + "loss": 0.9674, + "step": 572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017947869284600282, + "loss": 1.0607, + "step": 573 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001794070686529886, + "loss": 0.9959, + "step": 574 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017933533402912354, + "loss": 1.038, + "step": 575 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001792634890741685, + "loss": 1.1342, + "step": 576 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017919153388803774, + "loss": 1.0941, + "step": 577 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017911946857079888, + "loss": 1.1286, + "step": 578 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017904729322267256, + "loss": 1.0354, + "step": 579 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001789750079440326, + "loss": 1.1314, + "step": 580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017890261283540562, + "loss": 1.0365, + "step": 581 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017883010799747099, + "loss": 1.091, + "step": 582 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017875749353106062, + "loss": 0.9995, + "step": 583 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178684769537159, + "loss": 1.0435, + "step": 584 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017861193611690287, + "loss": 1.0555, + "step": 585 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017853899337158112, + "loss": 1.0637, + "step": 586 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017846594140263474, + "loss": 1.064, + "step": 587 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017839278031165658, + "loss": 0.9879, + "step": 588 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017831951020039126, + "loss": 1.0846, + "step": 589 + }, + { + "epoch": 0.88, + "learning_rate": 0.000178246131170735, + "loss": 1.0373, + "step": 590 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017817264332473546, + "loss": 1.0377, + "step": 591 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017809904676459177, + "loss": 1.0932, + "step": 592 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017802534159265404, + "loss": 1.085, + "step": 593 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001779515279114236, + "loss": 1.0975, + "step": 594 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001778776058235526, + "loss": 1.1283, + "step": 595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017780357543184397, + "loss": 1.0652, + "step": 596 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017772943683925122, + "loss": 1.0336, + "step": 597 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017765519014887842, + "loss": 0.9761, + "step": 598 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001775808354639799, + "loss": 1.0688, + "step": 599 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017750637288796016, + "loss": 1.1031, + "step": 600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017743180252437383, + "loss": 1.083, + "step": 601 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017735712447692538, + "loss": 1.1612, + "step": 602 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017728233884946903, + "loss": 1.1618, + "step": 603 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017720744574600863, + "loss": 1.144, + "step": 604 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001771324452706975, + "loss": 1.1174, + "step": 605 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017705733752783825, + "loss": 0.9728, + "step": 606 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001769821226218827, + "loss": 1.0599, + "step": 607 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001769068006574317, + "loss": 1.0639, + "step": 608 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017683137173923495, + "loss": 1.1278, + "step": 609 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017675583597219095, + "loss": 0.9925, + "step": 610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766801934613467, + "loss": 1.0457, + "step": 611 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766044443118978, + "loss": 1.0348, + "step": 612 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176528588629188, + "loss": 1.022, + "step": 613 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017645262651870926, + "loss": 1.0027, + "step": 614 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017637655808610156, + "loss": 1.0491, + "step": 615 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017630038343715275, + "loss": 1.0413, + "step": 616 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017622410267779834, + "loss": 1.0358, + "step": 617 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017614771591412148, + "loss": 1.1125, + "step": 618 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017607122325235267, + "loss": 1.1185, + "step": 619 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017599462479886974, + "loss": 1.0738, + "step": 620 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017591792066019765, + "loss": 1.102, + "step": 621 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017584111094300827, + "loss": 1.065, + "step": 622 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001757641957541203, + "loss": 1.0514, + "step": 623 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001756871752004992, + "loss": 1.0396, + "step": 624 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017561004938925688, + "loss": 1.1027, + "step": 625 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017553281842765169, + "loss": 1.0223, + "step": 626 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017545548242308816, + "loss": 1.1793, + "step": 627 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017537804148311695, + "loss": 1.0642, + "step": 628 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017530049571543464, + "loss": 1.0682, + "step": 629 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017522284522788353, + "loss": 1.0476, + "step": 630 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017514509012845164, + "loss": 1.1064, + "step": 631 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017506723052527242, + "loss": 1.0258, + "step": 632 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017498926652662476, + "loss": 1.1954, + "step": 633 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001749111982409325, + "loss": 1.0637, + "step": 634 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017483302577676475, + "loss": 0.9685, + "step": 635 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017475474924283536, + "loss": 1.0465, + "step": 636 + }, + { + "epoch": 0.95, + "learning_rate": 0.000174676368748003, + "loss": 1.0161, + "step": 637 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017459788440127083, + "loss": 1.0479, + "step": 638 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017451929631178648, + "loss": 1.1166, + "step": 639 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001744406045888419, + "loss": 1.0634, + "step": 640 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017436180934187308, + "loss": 1.0826, + "step": 641 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017428291068046, + "loss": 1.07, + "step": 642 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017420390871432647, + "loss": 1.1167, + "step": 643 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017412480355334005, + "loss": 1.0347, + "step": 644 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017404559530751162, + "loss": 1.0393, + "step": 645 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017396628408699555, + "loss": 1.1108, + "step": 646 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017388687000208946, + "loss": 1.006, + "step": 647 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001738073531632339, + "loss": 1.0932, + "step": 648 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001737277336810124, + "loss": 1.0123, + "step": 649 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017364801166615124, + "loss": 1.1273, + "step": 650 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001735681872295192, + "loss": 0.9893, + "step": 651 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001734882604821276, + "loss": 1.0699, + "step": 652 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017340823153513002, + "loss": 1.0901, + "step": 653 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017332810049982208, + "loss": 1.0212, + "step": 654 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017324786748764155, + "loss": 0.9898, + "step": 655 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017316753261016783, + "loss": 1.0899, + "step": 656 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017308709597912213, + "loss": 1.085, + "step": 657 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017300655770636708, + "loss": 1.091, + "step": 658 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017292591790390665, + "loss": 1.0502, + "step": 659 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001728451766838861, + "loss": 1.2131, + "step": 660 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017276433415859167, + "loss": 1.1256, + "step": 661 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017268339044045042, + "loss": 1.0577, + "step": 662 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017260234564203032, + "loss": 1.0012, + "step": 663 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017252119987603973, + "loss": 1.0611, + "step": 664 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017243995325532755, + "loss": 1.1251, + "step": 665 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017235860589288277, + "loss": 1.0959, + "step": 666 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001722771579018347, + "loss": 1.1413, + "step": 667 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017219560939545246, + "loss": 1.0728, + "step": 668 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017211396048714498, + "loss": 1.0461, + "step": 669 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001720322112904608, + "loss": 1.1084, + "step": 670 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017195036191908797, + "loss": 1.1316, + "step": 671 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017186841248685383, + "loss": 1.0816, + "step": 672 + }, + { + "epoch": 1.0, + "eval_loss": 1.0170178413391113, + "eval_runtime": 2.6119, + "eval_samples_per_second": 418.079, + "eval_steps_per_second": 26.417, + "step": 672 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001717863631077249, + "loss": 1.0711, + "step": 673 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017170421389580667, + "loss": 1.1245, + "step": 674 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017162196496534342, + "loss": 1.0519, + "step": 675 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001715396164307182, + "loss": 1.104, + "step": 676 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017145716840645254, + "loss": 1.1193, + "step": 677 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017137462100720631, + "loss": 1.1238, + "step": 678 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017129197434777763, + "loss": 1.004, + "step": 679 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017120922854310257, + "loss": 1.0426, + "step": 680 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017112638370825515, + "loss": 1.0308, + "step": 681 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017104343995844715, + "loss": 1.0892, + "step": 682 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017096039740902784, + "loss": 1.0115, + "step": 683 + }, + { + "epoch": 1.02, + "learning_rate": 0.00017087725617548385, + "loss": 1.1011, + "step": 684 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017079401637343914, + "loss": 0.9829, + "step": 685 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017071067811865476, + "loss": 0.9738, + "step": 686 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001706272415270286, + "loss": 1.0563, + "step": 687 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017054370671459532, + "loss": 1.0153, + "step": 688 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001704600737975262, + "loss": 1.0638, + "step": 689 + }, + { + "epoch": 1.01, + "learning_rate": 0.000170376342892129, + "loss": 1.0053, + "step": 690 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017029251411484765, + "loss": 1.0178, + "step": 691 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017020858758226229, + "loss": 1.0755, + "step": 692 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017012456341108885, + "loss": 0.9365, + "step": 693 + }, + { + "epoch": 1.01, + "learning_rate": 0.00017004044171817925, + "loss": 1.0666, + "step": 694 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016995622262052092, + "loss": 1.041, + "step": 695 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016987190623523674, + "loss": 1.0387, + "step": 696 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016978749267958495, + "loss": 0.9332, + "step": 697 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016970298207095885, + "loss": 1.0737, + "step": 698 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016961837452688676, + "loss": 0.992, + "step": 699 + }, + { + "epoch": 1.02, + "learning_rate": 0.00016953367016503182, + "loss": 0.9997, + "step": 700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016944886910319173, + "loss": 1.1054, + "step": 701 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016936397145929878, + "loss": 0.9876, + "step": 702 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016927897735141952, + "loss": 1.0158, + "step": 703 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016919388689775464, + "loss": 0.9771, + "step": 704 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016910870021663883, + "loss": 0.942, + "step": 705 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016902341742654065, + "loss": 1.0217, + "step": 706 + }, + { + "epoch": 1.03, + "learning_rate": 0.00016893803864606222, + "loss": 1.0346, + "step": 707 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016885256399393924, + "loss": 0.9891, + "step": 708 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016876699358904068, + "loss": 0.9697, + "step": 709 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016868132755036875, + "loss": 1.0062, + "step": 710 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016859556599705856, + "loss": 0.9822, + "step": 711 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001685097090483781, + "loss": 1.0921, + "step": 712 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016842375682372805, + "loss": 1.0126, + "step": 713 + }, + { + "epoch": 1.04, + "learning_rate": 0.00016833770944264153, + "loss": 1.0043, + "step": 714 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016825156702478407, + "loss": 0.952, + "step": 715 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016816532968995328, + "loss": 1.0423, + "step": 716 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016807899755807886, + "loss": 1.0465, + "step": 717 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016799257074922224, + "loss": 0.9827, + "step": 718 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016790604938357663, + "loss": 0.9798, + "step": 719 + }, + { + "epoch": 1.05, + "learning_rate": 0.00016781943358146664, + "loss": 1.0268, + "step": 720 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016773272346334828, + "loss": 1.0007, + "step": 721 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001676459191498087, + "loss": 0.9989, + "step": 722 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016755902076156604, + "loss": 0.9374, + "step": 723 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016747202841946928, + "loss": 1.0031, + "step": 724 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016738494224449802, + "loss": 0.9751, + "step": 725 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016729776235776246, + "loss": 1.1055, + "step": 726 + }, + { + "epoch": 1.06, + "learning_rate": 0.00016721048888050302, + "loss": 1.0527, + "step": 727 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001671231219340903, + "loss": 1.0048, + "step": 728 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001670356616400249, + "loss": 0.957, + "step": 729 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016694810811993723, + "loss": 1.0598, + "step": 730 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016686046149558736, + "loss": 1.02, + "step": 731 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016677272188886483, + "loss": 0.9973, + "step": 732 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016668488942178856, + "loss": 1.0685, + "step": 733 + }, + { + "epoch": 1.07, + "learning_rate": 0.00016659696421650645, + "loss": 0.9783, + "step": 734 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016650894639529544, + "loss": 0.9767, + "step": 735 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016642083608056141, + "loss": 1.0192, + "step": 736 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016633263339483866, + "loss": 1.0121, + "step": 737 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016624433846079012, + "loss": 0.9817, + "step": 738 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016615595140120686, + "loss": 1.1145, + "step": 739 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016606747233900815, + "loss": 0.9862, + "step": 740 + }, + { + "epoch": 1.08, + "learning_rate": 0.00016597890139724125, + "loss": 1.0606, + "step": 741 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658902386990811, + "loss": 1.0416, + "step": 742 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001658014843678303, + "loss": 0.9971, + "step": 743 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016571263852691888, + "loss": 1.0318, + "step": 744 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001656237012999041, + "loss": 1.0633, + "step": 745 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001655346728104704, + "loss": 1.0418, + "step": 746 + }, + { + "epoch": 1.09, + "learning_rate": 0.00016544555318242897, + "loss": 0.9308, + "step": 747 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016535634253971794, + "loss": 1.1049, + "step": 748 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001652670410064019, + "loss": 0.9377, + "step": 749 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016517764870667182, + "loss": 0.9934, + "step": 750 + }, + { + "epoch": 1.1, + "learning_rate": 0.000165088165764845, + "loss": 1.0467, + "step": 751 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016499859230536466, + "loss": 1.0172, + "step": 752 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001649089284528001, + "loss": 0.9922, + "step": 753 + }, + { + "epoch": 1.1, + "learning_rate": 0.00016481917433184607, + "loss": 1.0373, + "step": 754 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001647293300673231, + "loss": 1.0377, + "step": 755 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016463939578417692, + "loss": 0.9991, + "step": 756 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016454937160747854, + "loss": 1.0657, + "step": 757 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016445925766242391, + "loss": 0.9954, + "step": 758 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001643690540743339, + "loss": 1.018, + "step": 759 + }, + { + "epoch": 1.11, + "learning_rate": 0.00016427876096865394, + "loss": 1.01, + "step": 760 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001641883784709541, + "loss": 0.9318, + "step": 761 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001640979067069286, + "loss": 1.0174, + "step": 762 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016400734580239594, + "loss": 1.0886, + "step": 763 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001639166958832985, + "loss": 1.0316, + "step": 764 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001638259570757025, + "loss": 1.0514, + "step": 765 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001637351295057978, + "loss": 0.9914, + "step": 766 + }, + { + "epoch": 1.12, + "learning_rate": 0.00016364421329989755, + "loss": 1.0529, + "step": 767 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016355320858443842, + "loss": 0.9689, + "step": 768 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016346211548597995, + "loss": 1.0398, + "step": 769 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001633709341312046, + "loss": 1.0127, + "step": 770 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016327966464691778, + "loss": 1.1388, + "step": 771 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016318830716004722, + "loss": 0.9659, + "step": 772 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016309686179764317, + "loss": 0.9907, + "step": 773 + }, + { + "epoch": 1.13, + "learning_rate": 0.00016300532868687806, + "loss": 0.9168, + "step": 774 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001629137079550463, + "loss": 1.06, + "step": 775 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016282199972956425, + "loss": 0.9826, + "step": 776 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016273020413796983, + "loss": 1.0496, + "step": 777 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001626383213079226, + "loss": 1.0245, + "step": 778 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016254635136720328, + "loss": 1.036, + "step": 779 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001624542944437139, + "loss": 1.0283, + "step": 780 + }, + { + "epoch": 1.14, + "learning_rate": 0.00016236215066547734, + "loss": 1.0078, + "step": 781 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016226992016063723, + "loss": 0.9819, + "step": 782 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016217760305745803, + "loss": 1.0687, + "step": 783 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001620851994843244, + "loss": 1.0523, + "step": 784 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016199270956974128, + "loss": 1.0279, + "step": 785 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016190013344233388, + "loss": 1.0559, + "step": 786 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016180747123084705, + "loss": 1.0844, + "step": 787 + }, + { + "epoch": 1.15, + "learning_rate": 0.00016171472306414554, + "loss": 1.0724, + "step": 788 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016162188907121354, + "loss": 0.9696, + "step": 789 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016152896938115464, + "loss": 0.9551, + "step": 790 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001614359641231916, + "loss": 1.0032, + "step": 791 + }, + { + "epoch": 1.16, + "learning_rate": 0.0001613428734266662, + "loss": 1.1404, + "step": 792 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016124969742103897, + "loss": 1.0329, + "step": 793 + }, + { + "epoch": 1.16, + "learning_rate": 0.00016115643623588915, + "loss": 1.039, + "step": 794 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001610630900009144, + "loss": 1.0231, + "step": 795 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001609696588459307, + "loss": 1.0659, + "step": 796 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016087614290087208, + "loss": 1.0029, + "step": 797 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001607825422957905, + "loss": 0.985, + "step": 798 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016068885716085567, + "loss": 0.9392, + "step": 799 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016059508762635482, + "loss": 1.006, + "step": 800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00016050123382269264, + "loss": 1.0748, + "step": 801 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001604072958803909, + "loss": 1.1378, + "step": 802 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016031327393008845, + "loss": 1.058, + "step": 803 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016021916810254097, + "loss": 0.9827, + "step": 804 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016012497852862075, + "loss": 0.9572, + "step": 805 + }, + { + "epoch": 1.18, + "learning_rate": 0.00016003070533931657, + "loss": 1.0042, + "step": 806 + }, + { + "epoch": 1.18, + "learning_rate": 0.00015993634866573347, + "loss": 0.9521, + "step": 807 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001598419086390927, + "loss": 0.9395, + "step": 808 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015974738539073125, + "loss": 1.0902, + "step": 809 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015965277905210195, + "loss": 1.0408, + "step": 810 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015955808975477319, + "loss": 1.0436, + "step": 811 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015946331763042867, + "loss": 1.0845, + "step": 812 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015936846281086736, + "loss": 1.0752, + "step": 813 + }, + { + "epoch": 1.19, + "learning_rate": 0.00015927352542800317, + "loss": 1.0832, + "step": 814 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015917850561386488, + "loss": 0.9901, + "step": 815 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015908340350059583, + "loss": 1.0311, + "step": 816 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015898821922045385, + "loss": 0.9858, + "step": 817 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001588929529058111, + "loss": 0.9541, + "step": 818 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015879760468915372, + "loss": 0.9516, + "step": 819 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015870217470308188, + "loss": 1.0791, + "step": 820 + }, + { + "epoch": 1.2, + "learning_rate": 0.00015860666308030932, + "loss": 0.9099, + "step": 821 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015851106995366337, + "loss": 1.0983, + "step": 822 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015841539545608478, + "loss": 0.9951, + "step": 823 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015831963972062733, + "loss": 0.9661, + "step": 824 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015822380288045792, + "loss": 1.0111, + "step": 825 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001581278850688561, + "loss": 1.0436, + "step": 826 + }, + { + "epoch": 1.21, + "learning_rate": 0.00015803188641921417, + "loss": 1.0916, + "step": 827 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001579358070650367, + "loss": 1.0347, + "step": 828 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001578396471399406, + "loss": 0.9978, + "step": 829 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001577434067776548, + "loss": 1.0036, + "step": 830 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015764708611202015, + "loss": 1.0387, + "step": 831 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015755068527698902, + "loss": 1.0172, + "step": 832 + }, + { + "epoch": 1.22, + "learning_rate": 0.00015745420440662543, + "loss": 0.9723, + "step": 833 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001573576436351046, + "loss": 0.9662, + "step": 834 + }, + { + "epoch": 1.23, + "learning_rate": 0.000157261003096713, + "loss": 0.9849, + "step": 835 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015716428292584787, + "loss": 1.0198, + "step": 836 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015706748325701732, + "loss": 0.9015, + "step": 837 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001569706042248399, + "loss": 1.001, + "step": 838 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001568736459640447, + "loss": 0.9681, + "step": 839 + }, + { + "epoch": 1.23, + "learning_rate": 0.00015677660860947078, + "loss": 1.0513, + "step": 840 + }, + { + "epoch": 1.23, + "eval_loss": 1.008791446685791, + "eval_runtime": 2.6133, + "eval_samples_per_second": 417.867, + "eval_steps_per_second": 26.404, + "step": 840 + }, + { + "epoch": 1.23, + "learning_rate": 0.0001566794922960674, + "loss": 0.9829, + "step": 841 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015658229715889347, + "loss": 1.0362, + "step": 842 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015648502333311757, + "loss": 0.9736, + "step": 843 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001563876709540178, + "loss": 1.0457, + "step": 844 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015629024015698136, + "loss": 0.9786, + "step": 845 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015619273107750462, + "loss": 1.0595, + "step": 846 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001560951438511927, + "loss": 1.0307, + "step": 847 + }, + { + "epoch": 1.24, + "learning_rate": 0.00015599747861375955, + "loss": 1.0386, + "step": 848 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015589973550102747, + "loss": 0.9916, + "step": 849 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015580191464892716, + "loss": 0.9652, + "step": 850 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015570401619349736, + "loss": 0.9691, + "step": 851 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015560604027088477, + "loss": 1.0006, + "step": 852 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015550798701734385, + "loss": 1.0271, + "step": 853 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015540985656923645, + "loss": 1.0591, + "step": 854 + }, + { + "epoch": 1.25, + "learning_rate": 0.00015531164906303207, + "loss": 0.967, + "step": 855 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015521336463530705, + "loss": 1.0466, + "step": 856 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001551150034227449, + "loss": 0.9953, + "step": 857 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001550165655621359, + "loss": 0.9899, + "step": 858 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015491805119037684, + "loss": 0.9742, + "step": 859 + }, + { + "epoch": 1.26, + "learning_rate": 0.00015481946044447099, + "loss": 0.9865, + "step": 860 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001547207934615278, + "loss": 0.9384, + "step": 861 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015462205037876275, + "loss": 1.0216, + "step": 862 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015452323133349714, + "loss": 0.9467, + "step": 863 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001544243364631579, + "loss": 1.0038, + "step": 864 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001543253659052775, + "loss": 0.978, + "step": 865 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015422631979749354, + "loss": 1.0434, + "step": 866 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015412719827754873, + "loss": 1.0091, + "step": 867 + }, + { + "epoch": 1.27, + "learning_rate": 0.00015402800148329071, + "loss": 0.9598, + "step": 868 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015392872955267175, + "loss": 0.9876, + "step": 869 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015382938262374865, + "loss": 0.9559, + "step": 870 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001537299608346824, + "loss": 0.9984, + "step": 871 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015363046432373824, + "loss": 1.0171, + "step": 872 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001535308932292853, + "loss": 1.0188, + "step": 873 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015343124768979637, + "loss": 0.9613, + "step": 874 + }, + { + "epoch": 1.28, + "learning_rate": 0.00015333152784384777, + "loss": 0.9572, + "step": 875 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001532317338301192, + "loss": 1.0093, + "step": 876 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015313186578739353, + "loss": 0.9935, + "step": 877 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001530319238545565, + "loss": 1.0371, + "step": 878 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015293190817059667, + "loss": 1.0022, + "step": 879 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015283181887460517, + "loss": 1.0033, + "step": 880 + }, + { + "epoch": 1.29, + "learning_rate": 0.00015273165610577542, + "loss": 0.9986, + "step": 881 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015263142000340312, + "loss": 1.0495, + "step": 882 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001525311107068859, + "loss": 1.017, + "step": 883 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015243072835572318, + "loss": 0.9757, + "step": 884 + }, + { + "epoch": 1.3, + "learning_rate": 0.000152330273089516, + "loss": 1.0342, + "step": 885 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001522297450479668, + "loss": 1.0059, + "step": 886 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015212914437087922, + "loss": 0.9845, + "step": 887 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001520284711981579, + "loss": 1.0365, + "step": 888 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001519277256698083, + "loss": 0.9521, + "step": 889 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001518269079259366, + "loss": 1.0867, + "step": 890 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015172601810674915, + "loss": 1.0444, + "step": 891 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015162505635255287, + "loss": 1.077, + "step": 892 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015152402280375454, + "loss": 0.9883, + "step": 893 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001514229176008607, + "loss": 0.9819, + "step": 894 + }, + { + "epoch": 1.31, + "learning_rate": 0.00015132174088447776, + "loss": 0.9912, + "step": 895 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015122049279531143, + "loss": 0.9575, + "step": 896 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015111917347416671, + "loss": 1.0356, + "step": 897 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015101778306194765, + "loss": 0.9963, + "step": 898 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001509163216996572, + "loss": 0.9728, + "step": 899 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015081478952839693, + "loss": 1.0402, + "step": 900 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015071318668936695, + "loss": 1.0287, + "step": 901 + }, + { + "epoch": 1.32, + "learning_rate": 0.00015061151332386566, + "loss": 1.0505, + "step": 902 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015050976957328938, + "loss": 0.9814, + "step": 903 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015040795557913245, + "loss": 1.0083, + "step": 904 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015030607148298696, + "loss": 1.0871, + "step": 905 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015020411742654237, + "loss": 1.0943, + "step": 906 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001501020935515854, + "loss": 1.0631, + "step": 907 + }, + { + "epoch": 1.33, + "learning_rate": 0.00015000000000000001, + "loss": 1.0615, + "step": 908 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014989783691376696, + "loss": 0.8933, + "step": 909 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001497956044349637, + "loss": 1.012, + "step": 910 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014969330270576427, + "loss": 0.9215, + "step": 911 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014959093186843895, + "loss": 0.9894, + "step": 912 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014948849206535412, + "loss": 1.0053, + "step": 913 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014938598343897214, + "loss": 1.0975, + "step": 914 + }, + { + "epoch": 1.34, + "learning_rate": 0.00014928340613185097, + "loss": 1.068, + "step": 915 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001491807602866442, + "loss": 0.9838, + "step": 916 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014907804604610063, + "loss": 1.1493, + "step": 917 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014897526355306428, + "loss": 0.9491, + "step": 918 + }, + { + "epoch": 1.35, + "learning_rate": 0.000148872412950474, + "loss": 1.0252, + "step": 919 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014876949438136347, + "loss": 0.9555, + "step": 920 + }, + { + "epoch": 1.35, + "learning_rate": 0.00014866650798886074, + "loss": 0.9831, + "step": 921 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001485634539161883, + "loss": 1.0957, + "step": 922 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001484603323066627, + "loss": 0.9606, + "step": 923 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014835714330369446, + "loss": 1.0643, + "step": 924 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014825388705078777, + "loss": 1.0219, + "step": 925 + }, + { + "epoch": 1.36, + "learning_rate": 0.00014815056369154038, + "loss": 1.1315, + "step": 926 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001480471733696434, + "loss": 1.0406, + "step": 927 + }, + { + "epoch": 1.36, + "learning_rate": 0.000147943716228881, + "loss": 1.0284, + "step": 928 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014784019241313026, + "loss": 1.035, + "step": 929 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014773660206636105, + "loss": 1.0562, + "step": 930 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001476329453326357, + "loss": 0.9813, + "step": 931 + }, + { + "epoch": 1.37, + "learning_rate": 0.000147529222356109, + "loss": 1.0865, + "step": 932 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001474254332810277, + "loss": 1.0074, + "step": 933 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014732157825173044, + "loss": 1.0855, + "step": 934 + }, + { + "epoch": 1.37, + "learning_rate": 0.00014721765741264786, + "loss": 0.9785, + "step": 935 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001471136709083018, + "loss": 1.011, + "step": 936 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014700961888330563, + "loss": 1.0484, + "step": 937 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001469055014823637, + "loss": 1.0435, + "step": 938 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014680131885027141, + "loss": 1.0176, + "step": 939 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014669707113191483, + "loss": 0.9542, + "step": 940 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014659275847227042, + "loss": 0.9526, + "step": 941 + }, + { + "epoch": 1.38, + "learning_rate": 0.00014648838101640518, + "loss": 0.9681, + "step": 942 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014638393890947603, + "loss": 0.9072, + "step": 943 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001462794322967299, + "loss": 0.9939, + "step": 944 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014617486132350343, + "loss": 1.018, + "step": 945 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001460702261352226, + "loss": 0.8993, + "step": 946 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014596552687740302, + "loss": 1.0134, + "step": 947 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014586076369564908, + "loss": 0.947, + "step": 948 + }, + { + "epoch": 1.39, + "learning_rate": 0.00014575593673565426, + "loss": 1.0697, + "step": 949 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014565104614320065, + "loss": 1.006, + "step": 950 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014554609206415885, + "loss": 1.0262, + "step": 951 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014544107464448775, + "loss": 0.9809, + "step": 952 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001453359940302344, + "loss": 0.981, + "step": 953 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014523085036753354, + "loss": 0.9925, + "step": 954 + }, + { + "epoch": 1.4, + "learning_rate": 0.00014512564380260787, + "loss": 1.0199, + "step": 955 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014502037448176734, + "loss": 0.9715, + "step": 956 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014491504255140927, + "loss": 1.0072, + "step": 957 + }, + { + "epoch": 1.41, + "learning_rate": 0.000144809648158018, + "loss": 1.0659, + "step": 958 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014470419144816483, + "loss": 1.0538, + "step": 959 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001445986725685076, + "loss": 1.0571, + "step": 960 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014449309166579072, + "loss": 0.9701, + "step": 961 + }, + { + "epoch": 1.41, + "learning_rate": 0.00014438744888684482, + "loss": 0.9618, + "step": 962 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001442817443785865, + "loss": 0.9179, + "step": 963 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014417597828801832, + "loss": 1.0613, + "step": 964 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014407015076222846, + "loss": 0.9558, + "step": 965 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014396426194839042, + "loss": 0.9823, + "step": 966 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014385831199376317, + "loss": 0.9968, + "step": 967 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014375230104569044, + "loss": 0.9829, + "step": 968 + }, + { + "epoch": 1.42, + "learning_rate": 0.00014364622925160098, + "loss": 1.0552, + "step": 969 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014354009675900803, + "loss": 0.993, + "step": 970 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014343390371550935, + "loss": 1.0927, + "step": 971 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014332765026878687, + "loss": 1.0387, + "step": 972 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014322133656660647, + "loss": 0.9255, + "step": 973 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014311496275681783, + "loss": 1.0093, + "step": 974 + }, + { + "epoch": 1.43, + "learning_rate": 0.00014300852898735435, + "loss": 1.0078, + "step": 975 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014290203540623267, + "loss": 0.9161, + "step": 976 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014279548216155266, + "loss": 1.03, + "step": 977 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014268886940149714, + "loss": 1.0364, + "step": 978 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001425821972743318, + "loss": 0.9768, + "step": 979 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001424754659284048, + "loss": 1.1229, + "step": 980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001423686755121466, + "loss": 1.0362, + "step": 981 + }, + { + "epoch": 1.44, + "learning_rate": 0.00014226182617406996, + "loss": 0.9522, + "step": 982 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014215491806276944, + "loss": 1.0479, + "step": 983 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014204795132692144, + "loss": 1.0671, + "step": 984 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014194092611528384, + "loss": 0.8983, + "step": 985 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014183384257669581, + "loss": 1.004, + "step": 986 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014172670086007774, + "loss": 1.0972, + "step": 987 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014161950111443077, + "loss": 1.0198, + "step": 988 + }, + { + "epoch": 1.45, + "learning_rate": 0.00014151224348883692, + "loss": 1.0257, + "step": 989 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014140492813245856, + "loss": 0.9717, + "step": 990 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001412975551945384, + "loss": 0.9455, + "step": 991 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001411901248243993, + "loss": 1.0372, + "step": 992 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001410826371714438, + "loss": 0.9961, + "step": 993 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014097509238515432, + "loss": 1.0599, + "step": 994 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014086749061509258, + "loss": 1.0166, + "step": 995 + }, + { + "epoch": 1.46, + "learning_rate": 0.00014075983201089964, + "loss": 1.0254, + "step": 996 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014065211672229555, + "loss": 0.9979, + "step": 997 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014054434489907915, + "loss": 1.0365, + "step": 998 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014043651669112808, + "loss": 1.0075, + "step": 999 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014032863224839814, + "loss": 0.9743, + "step": 1000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014022069172092352, + "loss": 1.0056, + "step": 1001 + }, + { + "epoch": 1.47, + "learning_rate": 0.00014011269525881636, + "loss": 0.9647, + "step": 1002 + }, + { + "epoch": 1.48, + "learning_rate": 0.00014000464301226656, + "loss": 1.0912, + "step": 1003 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013989653513154165, + "loss": 0.8811, + "step": 1004 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013978837176698646, + "loss": 1.0667, + "step": 1005 + }, + { + "epoch": 1.48, + "learning_rate": 0.000139680153069023, + "loss": 1.0096, + "step": 1006 + }, + { + "epoch": 1.48, + "learning_rate": 0.00013957187918815032, + "loss": 0.926, + "step": 1007 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001394635502749441, + "loss": 1.0814, + "step": 1008 + }, + { + "epoch": 1.48, + "eval_loss": 1.0040607452392578, + "eval_runtime": 2.6168, + "eval_samples_per_second": 417.304, + "eval_steps_per_second": 26.368, + "step": 1008 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001393551664800566, + "loss": 1.0941, + "step": 1009 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013924672795421637, + "loss": 1.044, + "step": 1010 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013913823484822815, + "loss": 1.049, + "step": 1011 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013902968731297255, + "loss": 0.9891, + "step": 1012 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013892108549940583, + "loss": 0.9663, + "step": 1013 + }, + { + "epoch": 1.49, + "learning_rate": 0.00013881242955855974, + "loss": 1.0298, + "step": 1014 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001387037196415414, + "loss": 1.0083, + "step": 1015 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001385949558995329, + "loss": 0.9182, + "step": 1016 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013848613848379114, + "loss": 1.013, + "step": 1017 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013837726754564785, + "loss": 1.0022, + "step": 1018 + }, + { + "epoch": 1.5, + "learning_rate": 0.000138268343236509, + "loss": 0.9423, + "step": 1019 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013815936570785487, + "loss": 1.058, + "step": 1020 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013805033511123975, + "loss": 0.931, + "step": 1021 + }, + { + "epoch": 1.5, + "learning_rate": 0.00013794125159829172, + "loss": 1.0137, + "step": 1022 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013783211532071246, + "loss": 1.0517, + "step": 1023 + }, + { + "epoch": 1.51, + "learning_rate": 0.000137722926430277, + "loss": 1.0259, + "step": 1024 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013761368507883359, + "loss": 1.0263, + "step": 1025 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013750439141830339, + "loss": 1.0286, + "step": 1026 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013739504560068033, + "loss": 0.9749, + "step": 1027 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013728564777803088, + "loss": 0.9317, + "step": 1028 + }, + { + "epoch": 1.51, + "learning_rate": 0.00013717619810249378, + "loss": 1.0653, + "step": 1029 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013706669672627997, + "loss": 0.9623, + "step": 1030 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013695714380167223, + "loss": 0.9911, + "step": 1031 + }, + { + "epoch": 1.52, + "learning_rate": 0.000136847539481025, + "loss": 0.9843, + "step": 1032 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001367378839167643, + "loss": 0.981, + "step": 1033 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013662817726138728, + "loss": 1.0651, + "step": 1034 + }, + { + "epoch": 1.52, + "learning_rate": 0.00013651841966746232, + "loss": 1.0602, + "step": 1035 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001364086112876284, + "loss": 0.9524, + "step": 1036 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013629875227459532, + "loss": 1.0264, + "step": 1037 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013618884278114324, + "loss": 1.0691, + "step": 1038 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013607888296012259, + "loss": 1.0527, + "step": 1039 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001359688729644536, + "loss": 0.9629, + "step": 1040 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001358588129471264, + "loss": 0.957, + "step": 1041 + }, + { + "epoch": 1.53, + "learning_rate": 0.00013574870306120077, + "loss": 1.0976, + "step": 1042 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013563854345980569, + "loss": 0.9317, + "step": 1043 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013552833429613938, + "loss": 1.0359, + "step": 1044 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001354180757234689, + "loss": 1.0642, + "step": 1045 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001353077678951301, + "loss": 1.0526, + "step": 1046 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013519741096452726, + "loss": 1.0276, + "step": 1047 + }, + { + "epoch": 1.54, + "learning_rate": 0.00013508700508513307, + "loss": 1.0471, + "step": 1048 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001349765504104881, + "loss": 1.0353, + "step": 1049 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013486604709420102, + "loss": 1.0025, + "step": 1050 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013475549528994786, + "loss": 0.9019, + "step": 1051 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013464489515147238, + "loss": 1.0453, + "step": 1052 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013453424683258528, + "loss": 1.0395, + "step": 1053 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001344235504871645, + "loss": 0.8939, + "step": 1054 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013431280626915467, + "loss": 0.9198, + "step": 1055 + }, + { + "epoch": 1.55, + "learning_rate": 0.00013420201433256689, + "loss": 1.0046, + "step": 1056 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001340911748314788, + "loss": 0.9197, + "step": 1057 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013398028792003413, + "loss": 0.9547, + "step": 1058 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013386935375244246, + "loss": 0.968, + "step": 1059 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013375837248297926, + "loss": 0.9611, + "step": 1060 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013364734426598527, + "loss": 1.0125, + "step": 1061 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013353626925586672, + "loss": 1.0179, + "step": 1062 + }, + { + "epoch": 1.56, + "learning_rate": 0.00013342514760709485, + "loss": 1.04, + "step": 1063 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013331397947420576, + "loss": 0.9251, + "step": 1064 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013320276501180015, + "loss": 1.0762, + "step": 1065 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013309150437454322, + "loss": 1.0137, + "step": 1066 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013298019771716435, + "loss": 0.9981, + "step": 1067 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001328688451944569, + "loss": 1.003, + "step": 1068 + }, + { + "epoch": 1.57, + "learning_rate": 0.00013275744696127805, + "loss": 1.0307, + "step": 1069 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013264600317254853, + "loss": 1.0257, + "step": 1070 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013253451398325249, + "loss": 1.0426, + "step": 1071 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013242297954843711, + "loss": 1.0167, + "step": 1072 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013231140002321253, + "loss": 1.012, + "step": 1073 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013219977556275163, + "loss": 1.0649, + "step": 1074 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013208810632228977, + "loss": 0.9297, + "step": 1075 + }, + { + "epoch": 1.58, + "learning_rate": 0.00013197639245712454, + "loss": 0.9772, + "step": 1076 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013186463412261565, + "loss": 1.0194, + "step": 1077 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013175283147418465, + "loss": 1.0596, + "step": 1078 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013164098466731468, + "loss": 0.9938, + "step": 1079 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013152909385755025, + "loss": 0.9405, + "step": 1080 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001314171592004972, + "loss": 1.0175, + "step": 1081 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013130518085182225, + "loss": 0.9994, + "step": 1082 + }, + { + "epoch": 1.59, + "learning_rate": 0.00013119315896725287, + "loss": 0.9524, + "step": 1083 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013108109370257712, + "loss": 0.9112, + "step": 1084 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013096898521364338, + "loss": 0.9339, + "step": 1085 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013085683365636014, + "loss": 0.9718, + "step": 1086 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001307446391866958, + "loss": 0.969, + "step": 1087 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013063240196067836, + "loss": 1.0255, + "step": 1088 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013052012213439536, + "loss": 1.0119, + "step": 1089 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013040779986399362, + "loss": 1.0396, + "step": 1090 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013029543530567884, + "loss": 1.0202, + "step": 1091 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001301830286157157, + "loss": 0.9024, + "step": 1092 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013007057995042732, + "loss": 1.0079, + "step": 1093 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001299580894661953, + "loss": 0.9771, + "step": 1094 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001298455573194594, + "loss": 0.9942, + "step": 1095 + }, + { + "epoch": 1.61, + "learning_rate": 0.00012973298366671725, + "loss": 0.9879, + "step": 1096 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012962036866452422, + "loss": 0.9365, + "step": 1097 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001295077124694932, + "loss": 1.0128, + "step": 1098 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012939501523829444, + "loss": 1.0707, + "step": 1099 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012928227712765504, + "loss": 0.9769, + "step": 1100 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012916949829435922, + "loss": 1.0208, + "step": 1101 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001290566788952477, + "loss": 1.0376, + "step": 1102 + }, + { + "epoch": 1.62, + "learning_rate": 0.00012894381908721756, + "loss": 1.0588, + "step": 1103 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001288309190272222, + "loss": 1.0217, + "step": 1104 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012871797887227087, + "loss": 0.9684, + "step": 1105 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012860499877942875, + "loss": 0.9753, + "step": 1106 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012849197890581638, + "loss": 1.0094, + "step": 1107 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012837891940860972, + "loss": 1.0346, + "step": 1108 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012826582044503978, + "loss": 0.8741, + "step": 1109 + }, + { + "epoch": 1.63, + "learning_rate": 0.00012815268217239252, + "loss": 1.0223, + "step": 1110 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012803950474800862, + "loss": 0.8748, + "step": 1111 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012792628832928302, + "loss": 1.0296, + "step": 1112 + }, + { + "epoch": 1.64, + "learning_rate": 0.000127813033073665, + "loss": 0.9993, + "step": 1113 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012769973913865794, + "loss": 1.0555, + "step": 1114 + }, + { + "epoch": 1.64, + "learning_rate": 0.00012758640668181882, + "loss": 1.0245, + "step": 1115 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001274730358607583, + "loss": 0.9502, + "step": 1116 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012735962683314042, + "loss": 1.0165, + "step": 1117 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001272461797566823, + "loss": 1.0669, + "step": 1118 + }, + { + "epoch": 1.65, + "learning_rate": 0.000127132694789154, + "loss": 0.8676, + "step": 1119 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001270191720883782, + "loss": 0.9432, + "step": 1120 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012690561181223024, + "loss": 1.0614, + "step": 1121 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001267920141186375, + "loss": 0.9924, + "step": 1122 + }, + { + "epoch": 1.65, + "learning_rate": 0.00012667837916557954, + "loss": 1.139, + "step": 1123 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012656470711108764, + "loss": 1.0043, + "step": 1124 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012645099811324476, + "loss": 1.0747, + "step": 1125 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001263372523301852, + "loss": 0.9668, + "step": 1126 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012622346992009447, + "loss": 0.9931, + "step": 1127 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012610965104120885, + "loss": 0.9393, + "step": 1128 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012599579585181552, + "loss": 0.9918, + "step": 1129 + }, + { + "epoch": 1.66, + "learning_rate": 0.00012588190451025207, + "loss": 1.0172, + "step": 1130 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012576797717490644, + "loss": 1.0586, + "step": 1131 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012565401400421651, + "loss": 1.0482, + "step": 1132 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012554001515667008, + "loss": 1.0548, + "step": 1133 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012542598079080456, + "loss": 1.0092, + "step": 1134 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012531191106520672, + "loss": 1.0162, + "step": 1135 + }, + { + "epoch": 1.67, + "learning_rate": 0.00012519780613851254, + "loss": 1.0387, + "step": 1136 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001250836661694069, + "loss": 0.9607, + "step": 1137 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012496949131662348, + "loss": 1.0025, + "step": 1138 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012485528173894448, + "loss": 1.0014, + "step": 1139 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012474103759520027, + "loss": 0.9838, + "step": 1140 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001246267590442694, + "loss": 1.0384, + "step": 1141 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012451244624507831, + "loss": 0.9958, + "step": 1142 + }, + { + "epoch": 1.68, + "learning_rate": 0.00012439809935660095, + "loss": 0.9927, + "step": 1143 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001242837185378587, + "loss": 1.0082, + "step": 1144 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012416930394792026, + "loss": 0.9729, + "step": 1145 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012405485574590113, + "loss": 1.0464, + "step": 1146 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012394037409096357, + "loss": 0.987, + "step": 1147 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001238258591423165, + "loss": 0.9402, + "step": 1148 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012371131105921504, + "loss": 1.0293, + "step": 1149 + }, + { + "epoch": 1.69, + "learning_rate": 0.00012359673000096033, + "loss": 0.9418, + "step": 1150 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001234821161268995, + "loss": 0.964, + "step": 1151 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012336746959642526, + "loss": 0.9982, + "step": 1152 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001232527905689757, + "loss": 0.9364, + "step": 1153 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012313807920403419, + "loss": 0.9399, + "step": 1154 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001230233356611289, + "loss": 1.015, + "step": 1155 + }, + { + "epoch": 1.7, + "learning_rate": 0.000122908560099833, + "loss": 1.0214, + "step": 1156 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012279375267976398, + "loss": 1.0262, + "step": 1157 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012267891356058377, + "loss": 1.0277, + "step": 1158 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012256404290199825, + "loss": 1.0095, + "step": 1159 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012244914086375724, + "loss": 1.0314, + "step": 1160 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012233420760565428, + "loss": 0.8282, + "step": 1161 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012221924328752616, + "loss": 0.9709, + "step": 1162 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012210424806925301, + "loss": 0.941, + "step": 1163 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012198922211075778, + "loss": 0.9716, + "step": 1164 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012187416557200633, + "loss": 1.0125, + "step": 1165 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012175907861300697, + "loss": 1.0159, + "step": 1166 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012164396139381029, + "loss": 0.9306, + "step": 1167 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012152881407450905, + "loss": 1.1056, + "step": 1168 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012141363681523776, + "loss": 1.0113, + "step": 1169 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012129842977617265, + "loss": 0.9983, + "step": 1170 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012118319311753137, + "loss": 1.0076, + "step": 1171 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012106792699957263, + "loss": 1.1181, + "step": 1172 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012095263158259631, + "loss": 0.8759, + "step": 1173 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012083730702694291, + "loss": 0.9855, + "step": 1174 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012072195349299345, + "loss": 1.1361, + "step": 1175 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012060657114116926, + "loss": 1.0275, + "step": 1176 + }, + { + "epoch": 1.73, + "eval_loss": 0.9928944110870361, + "eval_runtime": 2.6469, + "eval_samples_per_second": 412.56, + "eval_steps_per_second": 26.068, + "step": 1176 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001204911601319318, + "loss": 1.0256, + "step": 1177 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012037572062578238, + "loss": 0.9218, + "step": 1178 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012026025278326187, + "loss": 1.0394, + "step": 1179 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012014475676495052, + "loss": 1.0318, + "step": 1180 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012002923273146794, + "loss": 1.0361, + "step": 1181 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011991368084347252, + "loss": 1.0093, + "step": 1182 + }, + { + "epoch": 1.74, + "learning_rate": 0.00011979810126166151, + "loss": 0.9527, + "step": 1183 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011968249414677055, + "loss": 1.0946, + "step": 1184 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011956685965957368, + "loss": 1.0124, + "step": 1185 + }, + { + "epoch": 1.75, + "learning_rate": 0.000119451197960883, + "loss": 1.0074, + "step": 1186 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011933550921154834, + "loss": 1.0315, + "step": 1187 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001192197935724573, + "loss": 0.9915, + "step": 1188 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011910405120453476, + "loss": 0.9823, + "step": 1189 + }, + { + "epoch": 1.75, + "learning_rate": 0.00011898828226874284, + "loss": 1.0294, + "step": 1190 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011887248692608057, + "loss": 1.0176, + "step": 1191 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011875666533758372, + "loss": 1.0486, + "step": 1192 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011864081766432456, + "loss": 1.0237, + "step": 1193 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011852494406741165, + "loss": 1.0469, + "step": 1194 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011840904470798955, + "loss": 0.9545, + "step": 1195 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011829311974723867, + "loss": 0.9812, + "step": 1196 + }, + { + "epoch": 1.76, + "learning_rate": 0.00011817716934637509, + "loss": 1.0503, + "step": 1197 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001180611936666502, + "loss": 1.0693, + "step": 1198 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011794519286935055, + "loss": 0.9627, + "step": 1199 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011782916711579759, + "loss": 0.9728, + "step": 1200 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001177131165673476, + "loss": 1.13, + "step": 1201 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001175970413853912, + "loss": 0.9756, + "step": 1202 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011748094173135337, + "loss": 1.0069, + "step": 1203 + }, + { + "epoch": 1.77, + "learning_rate": 0.00011736481776669306, + "loss": 1.033, + "step": 1204 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011724866965290302, + "loss": 0.9906, + "step": 1205 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011713249755150965, + "loss": 1.1008, + "step": 1206 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011701630162407266, + "loss": 0.9987, + "step": 1207 + }, + { + "epoch": 1.78, + "learning_rate": 0.00011690008203218493, + "loss": 1.0122, + "step": 1208 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001167838389374722, + "loss": 1.0495, + "step": 1209 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001166675725015929, + "loss": 0.9875, + "step": 1210 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011655128288623802, + "loss": 1.0231, + "step": 1211 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011643497025313061, + "loss": 0.9342, + "step": 1212 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011631863476402594, + "loss": 1.1006, + "step": 1213 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011620227658071087, + "loss": 0.9264, + "step": 1214 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011608589586500391, + "loss": 1.1099, + "step": 1215 + }, + { + "epoch": 1.79, + "learning_rate": 0.00011596949277875495, + "loss": 1.0326, + "step": 1216 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001158530674838449, + "loss": 0.9235, + "step": 1217 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011573662014218564, + "loss": 1.0227, + "step": 1218 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011562015091571963, + "loss": 1.0028, + "step": 1219 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011550365996641979, + "loss": 1.0744, + "step": 1220 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011538714745628931, + "loss": 0.9521, + "step": 1221 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011527061354736129, + "loss": 1.0171, + "step": 1222 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011515405840169861, + "loss": 1.0481, + "step": 1223 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011503748218139369, + "loss": 1.0034, + "step": 1224 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011492088504856826, + "loss": 1.1384, + "step": 1225 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011480426716537315, + "loss": 1.0268, + "step": 1226 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011468762869398802, + "loss": 1.003, + "step": 1227 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011457096979662114, + "loss": 1.1087, + "step": 1228 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011445429063550926, + "loss": 1.0809, + "step": 1229 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011433759137291727, + "loss": 1.0054, + "step": 1230 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011422087217113795, + "loss": 0.9416, + "step": 1231 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011410413319249194, + "loss": 1.0153, + "step": 1232 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011398737459932728, + "loss": 1.0622, + "step": 1233 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011387059655401932, + "loss": 1.0792, + "step": 1234 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011375379921897051, + "loss": 0.9822, + "step": 1235 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011363698275661001, + "loss": 0.9949, + "step": 1236 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011352014732939369, + "loss": 0.9653, + "step": 1237 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011340329309980377, + "loss": 1.0694, + "step": 1238 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011328642023034857, + "loss": 0.9925, + "step": 1239 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011316952888356237, + "loss": 0.9829, + "step": 1240 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011305261922200519, + "loss": 0.9659, + "step": 1241 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011293569140826239, + "loss": 1.109, + "step": 1242 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011281874560494472, + "loss": 1.0614, + "step": 1243 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011270178197468789, + "loss": 0.9013, + "step": 1244 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011258480068015235, + "loss": 1.0049, + "step": 1245 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011246780188402322, + "loss": 0.9746, + "step": 1246 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011235078574900984, + "loss": 1.1433, + "step": 1247 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011223375243784573, + "loss": 1.0196, + "step": 1248 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011211670211328833, + "loss": 0.9859, + "step": 1249 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001119996349381187, + "loss": 0.9037, + "step": 1250 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001118825510751413, + "loss": 1.0481, + "step": 1251 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011176545068718385, + "loss": 1.0324, + "step": 1252 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011164833393709706, + "loss": 1.0155, + "step": 1253 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011153120098775434, + "loss": 0.967, + "step": 1254 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011141405200205166, + "loss": 0.9766, + "step": 1255 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011129688714290729, + "loss": 1.0075, + "step": 1256 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011117970657326158, + "loss": 0.9472, + "step": 1257 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011106251045607674, + "loss": 0.9949, + "step": 1258 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011094529895433652, + "loss": 1.0302, + "step": 1259 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001108280722310462, + "loss": 1.0538, + "step": 1260 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011071083044923214, + "loss": 1.0025, + "step": 1261 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011059357377194161, + "loss": 1.024, + "step": 1262 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011047630236224271, + "loss": 0.9452, + "step": 1263 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011035901638322392, + "loss": 1.0055, + "step": 1264 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011024171599799409, + "loss": 0.9875, + "step": 1265 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011012440136968196, + "loss": 0.9582, + "step": 1266 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011000707266143617, + "loss": 0.9986, + "step": 1267 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010988973003642499, + "loss": 1.0328, + "step": 1268 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001097723736578359, + "loss": 1.0108, + "step": 1269 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010965500368887567, + "loss": 0.9941, + "step": 1270 + }, + { + "epoch": 1.87, + "learning_rate": 0.00010953762029276982, + "loss": 1.0842, + "step": 1271 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010942022363276264, + "loss": 0.994, + "step": 1272 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010930281387211683, + "loss": 1.0151, + "step": 1273 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010918539117411333, + "loss": 1.0172, + "step": 1274 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010906795570205104, + "loss": 1.0698, + "step": 1275 + }, + { + "epoch": 1.88, + "learning_rate": 0.00010895050761924668, + "loss": 0.9835, + "step": 1276 + }, + { + "epoch": 1.88, + "learning_rate": 0.0001088330470890344, + "loss": 0.9461, + "step": 1277 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010871557427476583, + "loss": 1.0394, + "step": 1278 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010859808933980948, + "loss": 0.9639, + "step": 1279 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010848059244755093, + "loss": 0.9863, + "step": 1280 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010836308376139221, + "loss": 1.0728, + "step": 1281 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010824556344475181, + "loss": 0.9989, + "step": 1282 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010812803166106444, + "loss": 0.962, + "step": 1283 + }, + { + "epoch": 1.89, + "learning_rate": 0.00010801048857378071, + "loss": 0.8658, + "step": 1284 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010789293434636698, + "loss": 1.0488, + "step": 1285 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010777536914230508, + "loss": 1.0183, + "step": 1286 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010765779312509208, + "loss": 0.9535, + "step": 1287 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010754020645824017, + "loss": 0.9978, + "step": 1288 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010742260930527625, + "loss": 0.8929, + "step": 1289 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001073050018297419, + "loss": 0.9762, + "step": 1290 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010718738419519297, + "loss": 1.0559, + "step": 1291 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010706975656519946, + "loss": 1.0327, + "step": 1292 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010695211910334537, + "loss": 1.0322, + "step": 1293 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010683447197322817, + "loss": 1.0542, + "step": 1294 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010671681533845899, + "loss": 1.0521, + "step": 1295 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010659914936266206, + "loss": 0.9967, + "step": 1296 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010648147420947461, + "loss": 1.0491, + "step": 1297 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010636379004254664, + "loss": 0.9035, + "step": 1298 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010624609702554069, + "loss": 1.0704, + "step": 1299 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010612839532213164, + "loss": 0.9533, + "step": 1300 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010601068509600642, + "loss": 1.0396, + "step": 1301 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010589296651086376, + "loss": 0.9543, + "step": 1302 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001057752397304141, + "loss": 1.0591, + "step": 1303 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010565750491837925, + "loss": 1.1191, + "step": 1304 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010553976223849218, + "loss": 0.916, + "step": 1305 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010542201185449678, + "loss": 0.9732, + "step": 1306 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010530425393014774, + "loss": 1.01, + "step": 1307 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010518648862921012, + "loss": 0.9849, + "step": 1308 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001050687161154593, + "loss": 1.0519, + "step": 1309 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010495093655268071, + "loss": 1.0539, + "step": 1310 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010483315010466952, + "loss": 0.9922, + "step": 1311 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010471535693523057, + "loss": 1.0048, + "step": 1312 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010459755720817797, + "loss": 1.0576, + "step": 1313 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010447975108733492, + "loss": 1.0268, + "step": 1314 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010436193873653361, + "loss": 1.0566, + "step": 1315 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010424412031961484, + "loss": 1.0294, + "step": 1316 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010412629600042785, + "loss": 1.0808, + "step": 1317 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010400846594283012, + "loss": 1.0487, + "step": 1318 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010389063031068698, + "loss": 1.04, + "step": 1319 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010377278926787173, + "loss": 1.033, + "step": 1320 + }, + { + "epoch": 1.95, + "learning_rate": 0.000103654942978265, + "loss": 0.9637, + "step": 1321 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010353709160575489, + "loss": 0.9665, + "step": 1322 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010341923531423634, + "loss": 1.0079, + "step": 1323 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010330137426761135, + "loss": 0.9989, + "step": 1324 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010318350862978848, + "loss": 1.0103, + "step": 1325 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010306563856468253, + "loss": 0.9872, + "step": 1326 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010294776423621464, + "loss": 0.9684, + "step": 1327 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010282988580831183, + "loss": 0.9745, + "step": 1328 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010271200344490674, + "loss": 1.0621, + "step": 1329 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001025941173099376, + "loss": 1.0639, + "step": 1330 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010247622756734774, + "loss": 0.914, + "step": 1331 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010235833438108571, + "loss": 1.0135, + "step": 1332 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010224043791510465, + "loss": 1.0132, + "step": 1333 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010212253833336237, + "loss": 0.9912, + "step": 1334 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010200463579982098, + "loss": 0.9869, + "step": 1335 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001018867304784467, + "loss": 0.9784, + "step": 1336 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010176882253320967, + "loss": 0.9837, + "step": 1337 + }, + { + "epoch": 1.97, + "learning_rate": 0.0001016509121280836, + "loss": 1.039, + "step": 1338 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010153299942704566, + "loss": 0.9984, + "step": 1339 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010141508459407623, + "loss": 1.0526, + "step": 1340 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010129716779315862, + "loss": 1.0581, + "step": 1341 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001011792491882789, + "loss": 1.0607, + "step": 1342 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010106132894342564, + "loss": 0.9734, + "step": 1343 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010094340722258969, + "loss": 0.8872, + "step": 1344 + }, + { + "epoch": 1.98, + "eval_loss": 0.9883129000663757, + "eval_runtime": 2.6191, + "eval_samples_per_second": 416.937, + "eval_steps_per_second": 26.345, + "step": 1344 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010082548418976399, + "loss": 1.0538, + "step": 1345 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010070756000894321, + "loss": 0.9638, + "step": 1346 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010058963484412372, + "loss": 1.0556, + "step": 1347 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010047170885930324, + "loss": 1.0462, + "step": 1348 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010035378221848053, + "loss": 0.9999, + "step": 1349 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010023585508565538, + "loss": 0.9644, + "step": 1350 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010011792762482826, + "loss": 1.0601, + "step": 1351 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001, + "loss": 0.9745, + "step": 1352 + }, + { + "epoch": 2.0, + "learning_rate": 9.988207237517178e-05, + "loss": 1.0911, + "step": 1353 + }, + { + "epoch": 2.0, + "learning_rate": 9.976414491434463e-05, + "loss": 0.9607, + "step": 1354 + }, + { + "epoch": 2.0, + "learning_rate": 9.964621778151946e-05, + "loss": 1.0151, + "step": 1355 + }, + { + "epoch": 2.0, + "learning_rate": 9.95282911406968e-05, + "loss": 1.036, + "step": 1356 + }, + { + "epoch": 2.0, + "learning_rate": 9.94103651558763e-05, + "loss": 1.0518, + "step": 1357 + }, + { + "epoch": 2.0, + "learning_rate": 9.929243999105682e-05, + "loss": 0.9251, + "step": 1358 + }, + { + "epoch": 2.01, + "learning_rate": 9.917451581023607e-05, + "loss": 0.9899, + "step": 1359 + }, + { + "epoch": 2.01, + "learning_rate": 9.905659277741032e-05, + "loss": 0.9737, + "step": 1360 + }, + { + "epoch": 2.01, + "learning_rate": 9.89386710565744e-05, + "loss": 1.0575, + "step": 1361 + }, + { + "epoch": 2.01, + "learning_rate": 9.882075081172112e-05, + "loss": 1.0074, + "step": 1362 + }, + { + "epoch": 2.01, + "learning_rate": 9.870283220684142e-05, + "loss": 0.9959, + "step": 1363 + }, + { + "epoch": 2.01, + "learning_rate": 9.858491540592382e-05, + "loss": 1.039, + "step": 1364 + }, + { + "epoch": 2.01, + "learning_rate": 9.846700057295435e-05, + "loss": 1.0228, + "step": 1365 + }, + { + "epoch": 2.02, + "learning_rate": 9.834908787191642e-05, + "loss": 1.014, + "step": 1366 + }, + { + "epoch": 2.02, + "learning_rate": 9.823117746679034e-05, + "loss": 0.9919, + "step": 1367 + }, + { + "epoch": 2.02, + "learning_rate": 9.811326952155331e-05, + "loss": 1.0975, + "step": 1368 + }, + { + "epoch": 2.02, + "learning_rate": 9.799536420017906e-05, + "loss": 0.934, + "step": 1369 + }, + { + "epoch": 2.0, + "learning_rate": 9.787746166663764e-05, + "loss": 0.9068, + "step": 1370 + }, + { + "epoch": 2.0, + "learning_rate": 9.775956208489536e-05, + "loss": 0.8948, + "step": 1371 + }, + { + "epoch": 2.0, + "learning_rate": 9.764166561891432e-05, + "loss": 0.9547, + "step": 1372 + }, + { + "epoch": 2.01, + "learning_rate": 9.752377243265229e-05, + "loss": 0.933, + "step": 1373 + }, + { + "epoch": 2.01, + "learning_rate": 9.740588269006246e-05, + "loss": 0.9899, + "step": 1374 + }, + { + "epoch": 2.01, + "learning_rate": 9.728799655509327e-05, + "loss": 0.9059, + "step": 1375 + }, + { + "epoch": 2.01, + "learning_rate": 9.71701141916882e-05, + "loss": 0.9062, + "step": 1376 + }, + { + "epoch": 2.01, + "learning_rate": 9.705223576378539e-05, + "loss": 0.9101, + "step": 1377 + }, + { + "epoch": 2.01, + "learning_rate": 9.69343614353175e-05, + "loss": 0.9635, + "step": 1378 + }, + { + "epoch": 2.01, + "learning_rate": 9.681649137021158e-05, + "loss": 0.8867, + "step": 1379 + }, + { + "epoch": 2.02, + "learning_rate": 9.669862573238863e-05, + "loss": 0.968, + "step": 1380 + }, + { + "epoch": 2.02, + "learning_rate": 9.658076468576368e-05, + "loss": 0.9432, + "step": 1381 + }, + { + "epoch": 2.02, + "learning_rate": 9.646290839424515e-05, + "loss": 0.9186, + "step": 1382 + }, + { + "epoch": 2.02, + "learning_rate": 9.6345057021735e-05, + "loss": 1.038, + "step": 1383 + }, + { + "epoch": 2.02, + "learning_rate": 9.622721073212832e-05, + "loss": 0.9284, + "step": 1384 + }, + { + "epoch": 2.02, + "learning_rate": 9.610936968931303e-05, + "loss": 0.9076, + "step": 1385 + }, + { + "epoch": 2.03, + "learning_rate": 9.599153405716992e-05, + "loss": 0.9196, + "step": 1386 + }, + { + "epoch": 2.03, + "learning_rate": 9.587370399957217e-05, + "loss": 0.9589, + "step": 1387 + }, + { + "epoch": 2.03, + "learning_rate": 9.57558796803852e-05, + "loss": 0.9658, + "step": 1388 + }, + { + "epoch": 2.03, + "learning_rate": 9.563806126346642e-05, + "loss": 0.8864, + "step": 1389 + }, + { + "epoch": 2.03, + "learning_rate": 9.55202489126651e-05, + "loss": 1.0115, + "step": 1390 + }, + { + "epoch": 2.03, + "learning_rate": 9.540244279182205e-05, + "loss": 0.9058, + "step": 1391 + }, + { + "epoch": 2.03, + "learning_rate": 9.528464306476945e-05, + "loss": 0.8686, + "step": 1392 + }, + { + "epoch": 2.04, + "learning_rate": 9.516684989533051e-05, + "loss": 0.9784, + "step": 1393 + }, + { + "epoch": 2.04, + "learning_rate": 9.504906344731932e-05, + "loss": 1.0002, + "step": 1394 + }, + { + "epoch": 2.04, + "learning_rate": 9.49312838845407e-05, + "loss": 0.9873, + "step": 1395 + }, + { + "epoch": 2.04, + "learning_rate": 9.48135113707899e-05, + "loss": 0.9026, + "step": 1396 + }, + { + "epoch": 2.04, + "learning_rate": 9.46957460698523e-05, + "loss": 0.9592, + "step": 1397 + }, + { + "epoch": 2.04, + "learning_rate": 9.457798814550323e-05, + "loss": 0.8996, + "step": 1398 + }, + { + "epoch": 2.04, + "learning_rate": 9.446023776150787e-05, + "loss": 1.0221, + "step": 1399 + }, + { + "epoch": 2.05, + "learning_rate": 9.434249508162076e-05, + "loss": 0.9343, + "step": 1400 + }, + { + "epoch": 2.05, + "learning_rate": 9.422476026958593e-05, + "loss": 0.9033, + "step": 1401 + }, + { + "epoch": 2.05, + "learning_rate": 9.410703348913627e-05, + "loss": 0.9904, + "step": 1402 + }, + { + "epoch": 2.05, + "learning_rate": 9.398931490399363e-05, + "loss": 0.8898, + "step": 1403 + }, + { + "epoch": 2.05, + "learning_rate": 9.38716046778684e-05, + "loss": 0.926, + "step": 1404 + }, + { + "epoch": 2.05, + "learning_rate": 9.375390297445932e-05, + "loss": 0.9515, + "step": 1405 + }, + { + "epoch": 2.06, + "learning_rate": 9.363620995745337e-05, + "loss": 0.9963, + "step": 1406 + }, + { + "epoch": 2.06, + "learning_rate": 9.351852579052543e-05, + "loss": 0.8657, + "step": 1407 + }, + { + "epoch": 2.06, + "learning_rate": 9.340085063733797e-05, + "loss": 0.9137, + "step": 1408 + }, + { + "epoch": 2.06, + "learning_rate": 9.328318466154102e-05, + "loss": 0.9354, + "step": 1409 + }, + { + "epoch": 2.06, + "learning_rate": 9.316552802677184e-05, + "loss": 0.8961, + "step": 1410 + }, + { + "epoch": 2.06, + "learning_rate": 9.304788089665466e-05, + "loss": 0.9796, + "step": 1411 + }, + { + "epoch": 2.06, + "learning_rate": 9.293024343480055e-05, + "loss": 0.8937, + "step": 1412 + }, + { + "epoch": 2.07, + "learning_rate": 9.281261580480705e-05, + "loss": 0.9589, + "step": 1413 + }, + { + "epoch": 2.07, + "learning_rate": 9.269499817025814e-05, + "loss": 1.025, + "step": 1414 + }, + { + "epoch": 2.07, + "learning_rate": 9.257739069472374e-05, + "loss": 0.9527, + "step": 1415 + }, + { + "epoch": 2.07, + "learning_rate": 9.245979354175985e-05, + "loss": 0.9236, + "step": 1416 + }, + { + "epoch": 2.07, + "learning_rate": 9.234220687490794e-05, + "loss": 0.8775, + "step": 1417 + }, + { + "epoch": 2.07, + "learning_rate": 9.222463085769494e-05, + "loss": 0.9171, + "step": 1418 + }, + { + "epoch": 2.07, + "learning_rate": 9.210706565363305e-05, + "loss": 0.8984, + "step": 1419 + }, + { + "epoch": 2.08, + "learning_rate": 9.198951142621929e-05, + "loss": 0.8724, + "step": 1420 + }, + { + "epoch": 2.08, + "learning_rate": 9.187196833893558e-05, + "loss": 0.918, + "step": 1421 + }, + { + "epoch": 2.08, + "learning_rate": 9.175443655524821e-05, + "loss": 0.9457, + "step": 1422 + }, + { + "epoch": 2.08, + "learning_rate": 9.163691623860784e-05, + "loss": 0.9508, + "step": 1423 + }, + { + "epoch": 2.08, + "learning_rate": 9.151940755244912e-05, + "loss": 0.8994, + "step": 1424 + }, + { + "epoch": 2.08, + "learning_rate": 9.14019106601905e-05, + "loss": 0.9118, + "step": 1425 + }, + { + "epoch": 2.08, + "learning_rate": 9.128442572523417e-05, + "loss": 0.8908, + "step": 1426 + }, + { + "epoch": 2.09, + "learning_rate": 9.11669529109656e-05, + "loss": 0.9755, + "step": 1427 + }, + { + "epoch": 2.09, + "learning_rate": 9.104949238075336e-05, + "loss": 0.9706, + "step": 1428 + }, + { + "epoch": 2.09, + "learning_rate": 9.093204429794898e-05, + "loss": 0.9156, + "step": 1429 + }, + { + "epoch": 2.09, + "learning_rate": 9.081460882588667e-05, + "loss": 0.9797, + "step": 1430 + }, + { + "epoch": 2.09, + "learning_rate": 9.069718612788318e-05, + "loss": 0.9402, + "step": 1431 + }, + { + "epoch": 2.09, + "learning_rate": 9.057977636723739e-05, + "loss": 0.9339, + "step": 1432 + }, + { + "epoch": 2.1, + "learning_rate": 9.04623797072302e-05, + "loss": 0.9026, + "step": 1433 + }, + { + "epoch": 2.1, + "learning_rate": 9.034499631112437e-05, + "loss": 0.9424, + "step": 1434 + }, + { + "epoch": 2.1, + "learning_rate": 9.022762634216409e-05, + "loss": 0.948, + "step": 1435 + }, + { + "epoch": 2.1, + "learning_rate": 9.011026996357503e-05, + "loss": 0.9323, + "step": 1436 + }, + { + "epoch": 2.1, + "learning_rate": 8.999292733856385e-05, + "loss": 1.0037, + "step": 1437 + }, + { + "epoch": 2.1, + "learning_rate": 8.987559863031808e-05, + "loss": 0.9549, + "step": 1438 + }, + { + "epoch": 2.1, + "learning_rate": 8.975828400200592e-05, + "loss": 0.915, + "step": 1439 + }, + { + "epoch": 2.11, + "learning_rate": 8.964098361677607e-05, + "loss": 0.9074, + "step": 1440 + }, + { + "epoch": 2.11, + "learning_rate": 8.952369763775733e-05, + "loss": 0.843, + "step": 1441 + }, + { + "epoch": 2.11, + "learning_rate": 8.94064262280584e-05, + "loss": 0.8654, + "step": 1442 + }, + { + "epoch": 2.11, + "learning_rate": 8.928916955076792e-05, + "loss": 0.9349, + "step": 1443 + }, + { + "epoch": 2.11, + "learning_rate": 8.917192776895382e-05, + "loss": 0.9353, + "step": 1444 + }, + { + "epoch": 2.11, + "learning_rate": 8.90547010456635e-05, + "loss": 0.9137, + "step": 1445 + }, + { + "epoch": 2.11, + "learning_rate": 8.89374895439233e-05, + "loss": 0.9528, + "step": 1446 + }, + { + "epoch": 2.12, + "learning_rate": 8.882029342673844e-05, + "loss": 0.8906, + "step": 1447 + }, + { + "epoch": 2.12, + "learning_rate": 8.870311285709274e-05, + "loss": 0.9351, + "step": 1448 + }, + { + "epoch": 2.12, + "learning_rate": 8.858594799794835e-05, + "loss": 1.0043, + "step": 1449 + }, + { + "epoch": 2.12, + "learning_rate": 8.846879901224566e-05, + "loss": 0.8462, + "step": 1450 + }, + { + "epoch": 2.12, + "learning_rate": 8.835166606290295e-05, + "loss": 1.01, + "step": 1451 + }, + { + "epoch": 2.12, + "learning_rate": 8.823454931281616e-05, + "loss": 0.8858, + "step": 1452 + }, + { + "epoch": 2.13, + "learning_rate": 8.811744892485871e-05, + "loss": 0.9954, + "step": 1453 + }, + { + "epoch": 2.13, + "learning_rate": 8.800036506188129e-05, + "loss": 0.9609, + "step": 1454 + }, + { + "epoch": 2.13, + "learning_rate": 8.788329788671167e-05, + "loss": 0.8891, + "step": 1455 + }, + { + "epoch": 2.13, + "learning_rate": 8.776624756215429e-05, + "loss": 0.9669, + "step": 1456 + }, + { + "epoch": 2.13, + "learning_rate": 8.76492142509902e-05, + "loss": 0.981, + "step": 1457 + }, + { + "epoch": 2.13, + "learning_rate": 8.753219811597683e-05, + "loss": 0.9607, + "step": 1458 + }, + { + "epoch": 2.13, + "learning_rate": 8.741519931984766e-05, + "loss": 0.8752, + "step": 1459 + }, + { + "epoch": 2.14, + "learning_rate": 8.729821802531212e-05, + "loss": 0.8649, + "step": 1460 + }, + { + "epoch": 2.14, + "learning_rate": 8.71812543950553e-05, + "loss": 0.9491, + "step": 1461 + }, + { + "epoch": 2.14, + "learning_rate": 8.706430859173763e-05, + "loss": 0.9335, + "step": 1462 + }, + { + "epoch": 2.14, + "learning_rate": 8.694738077799488e-05, + "loss": 0.9752, + "step": 1463 + }, + { + "epoch": 2.14, + "learning_rate": 8.683047111643763e-05, + "loss": 0.987, + "step": 1464 + }, + { + "epoch": 2.14, + "learning_rate": 8.671357976965147e-05, + "loss": 0.9728, + "step": 1465 + }, + { + "epoch": 2.14, + "learning_rate": 8.659670690019625e-05, + "loss": 0.8742, + "step": 1466 + }, + { + "epoch": 2.15, + "learning_rate": 8.647985267060635e-05, + "loss": 0.8658, + "step": 1467 + }, + { + "epoch": 2.15, + "learning_rate": 8.636301724339004e-05, + "loss": 1.0111, + "step": 1468 + }, + { + "epoch": 2.15, + "learning_rate": 8.624620078102951e-05, + "loss": 0.865, + "step": 1469 + }, + { + "epoch": 2.15, + "learning_rate": 8.612940344598067e-05, + "loss": 0.9406, + "step": 1470 + }, + { + "epoch": 2.15, + "learning_rate": 8.601262540067274e-05, + "loss": 0.9647, + "step": 1471 + }, + { + "epoch": 2.15, + "learning_rate": 8.58958668075081e-05, + "loss": 0.9346, + "step": 1472 + }, + { + "epoch": 2.15, + "learning_rate": 8.577912782886206e-05, + "loss": 0.9357, + "step": 1473 + }, + { + "epoch": 2.16, + "learning_rate": 8.566240862708274e-05, + "loss": 0.9226, + "step": 1474 + }, + { + "epoch": 2.16, + "learning_rate": 8.554570936449074e-05, + "loss": 0.8791, + "step": 1475 + }, + { + "epoch": 2.16, + "learning_rate": 8.542903020337887e-05, + "loss": 0.9111, + "step": 1476 + }, + { + "epoch": 2.16, + "learning_rate": 8.531237130601199e-05, + "loss": 0.928, + "step": 1477 + }, + { + "epoch": 2.16, + "learning_rate": 8.519573283462687e-05, + "loss": 0.9075, + "step": 1478 + }, + { + "epoch": 2.16, + "learning_rate": 8.507911495143173e-05, + "loss": 0.9911, + "step": 1479 + }, + { + "epoch": 2.17, + "learning_rate": 8.496251781860633e-05, + "loss": 0.9687, + "step": 1480 + }, + { + "epoch": 2.17, + "learning_rate": 8.484594159830141e-05, + "loss": 0.8915, + "step": 1481 + }, + { + "epoch": 2.17, + "learning_rate": 8.472938645263875e-05, + "loss": 0.9551, + "step": 1482 + }, + { + "epoch": 2.17, + "learning_rate": 8.461285254371073e-05, + "loss": 0.902, + "step": 1483 + }, + { + "epoch": 2.17, + "learning_rate": 8.449634003358022e-05, + "loss": 0.963, + "step": 1484 + }, + { + "epoch": 2.17, + "learning_rate": 8.437984908428041e-05, + "loss": 0.8784, + "step": 1485 + }, + { + "epoch": 2.17, + "learning_rate": 8.426337985781438e-05, + "loss": 0.9793, + "step": 1486 + }, + { + "epoch": 2.18, + "learning_rate": 8.414693251615512e-05, + "loss": 0.9435, + "step": 1487 + }, + { + "epoch": 2.18, + "learning_rate": 8.403050722124509e-05, + "loss": 0.9944, + "step": 1488 + }, + { + "epoch": 2.18, + "learning_rate": 8.39141041349961e-05, + "loss": 0.9912, + "step": 1489 + }, + { + "epoch": 2.18, + "learning_rate": 8.379772341928915e-05, + "loss": 0.8934, + "step": 1490 + }, + { + "epoch": 2.18, + "learning_rate": 8.36813652359741e-05, + "loss": 0.9868, + "step": 1491 + }, + { + "epoch": 2.18, + "learning_rate": 8.356502974686941e-05, + "loss": 0.8958, + "step": 1492 + }, + { + "epoch": 2.18, + "learning_rate": 8.344871711376203e-05, + "loss": 0.9075, + "step": 1493 + }, + { + "epoch": 2.19, + "learning_rate": 8.33324274984071e-05, + "loss": 0.954, + "step": 1494 + }, + { + "epoch": 2.19, + "learning_rate": 8.321616106252783e-05, + "loss": 0.9316, + "step": 1495 + }, + { + "epoch": 2.19, + "learning_rate": 8.309991796781511e-05, + "loss": 0.9949, + "step": 1496 + }, + { + "epoch": 2.19, + "learning_rate": 8.298369837592735e-05, + "loss": 1.0344, + "step": 1497 + }, + { + "epoch": 2.19, + "learning_rate": 8.286750244849038e-05, + "loss": 0.9034, + "step": 1498 + }, + { + "epoch": 2.19, + "learning_rate": 8.275133034709699e-05, + "loss": 0.9102, + "step": 1499 + }, + { + "epoch": 2.2, + "learning_rate": 8.263518223330697e-05, + "loss": 0.9013, + "step": 1500 + }, + { + "epoch": 2.2, + "learning_rate": 8.251905826864665e-05, + "loss": 1.0105, + "step": 1501 + }, + { + "epoch": 2.2, + "learning_rate": 8.240295861460881e-05, + "loss": 0.9215, + "step": 1502 + }, + { + "epoch": 2.2, + "learning_rate": 8.228688343265242e-05, + "loss": 0.8733, + "step": 1503 + }, + { + "epoch": 2.2, + "learning_rate": 8.217083288420241e-05, + "loss": 0.9214, + "step": 1504 + }, + { + "epoch": 2.2, + "learning_rate": 8.205480713064946e-05, + "loss": 0.9535, + "step": 1505 + }, + { + "epoch": 2.2, + "learning_rate": 8.193880633334982e-05, + "loss": 0.9155, + "step": 1506 + }, + { + "epoch": 2.21, + "learning_rate": 8.182283065362493e-05, + "loss": 0.9045, + "step": 1507 + }, + { + "epoch": 2.21, + "learning_rate": 8.170688025276134e-05, + "loss": 1.0051, + "step": 1508 + }, + { + "epoch": 2.21, + "learning_rate": 8.159095529201049e-05, + "loss": 0.9856, + "step": 1509 + }, + { + "epoch": 2.21, + "learning_rate": 8.147505593258837e-05, + "loss": 0.9189, + "step": 1510 + }, + { + "epoch": 2.21, + "learning_rate": 8.135918233567545e-05, + "loss": 0.9455, + "step": 1511 + }, + { + "epoch": 2.21, + "learning_rate": 8.124333466241632e-05, + "loss": 0.9351, + "step": 1512 + }, + { + "epoch": 2.21, + "eval_loss": 0.9985308051109314, + "eval_runtime": 2.6183, + "eval_samples_per_second": 417.062, + "eval_steps_per_second": 26.353, + "step": 1512 + }, + { + "epoch": 2.21, + "learning_rate": 8.112751307391946e-05, + "loss": 0.9408, + "step": 1513 + }, + { + "epoch": 2.22, + "learning_rate": 8.101171773125716e-05, + "loss": 0.8725, + "step": 1514 + }, + { + "epoch": 2.22, + "learning_rate": 8.089594879546525e-05, + "loss": 0.9437, + "step": 1515 + }, + { + "epoch": 2.22, + "learning_rate": 8.078020642754274e-05, + "loss": 0.904, + "step": 1516 + }, + { + "epoch": 2.22, + "learning_rate": 8.066449078845168e-05, + "loss": 0.93, + "step": 1517 + }, + { + "epoch": 2.22, + "learning_rate": 8.054880203911705e-05, + "loss": 1.0044, + "step": 1518 + }, + { + "epoch": 2.22, + "learning_rate": 8.043314034042631e-05, + "loss": 0.957, + "step": 1519 + }, + { + "epoch": 2.23, + "learning_rate": 8.031750585322947e-05, + "loss": 0.9136, + "step": 1520 + }, + { + "epoch": 2.23, + "learning_rate": 8.020189873833852e-05, + "loss": 0.9619, + "step": 1521 + }, + { + "epoch": 2.23, + "learning_rate": 8.00863191565275e-05, + "loss": 0.961, + "step": 1522 + }, + { + "epoch": 2.23, + "learning_rate": 7.99707672685321e-05, + "loss": 0.8426, + "step": 1523 + }, + { + "epoch": 2.23, + "learning_rate": 7.985524323504948e-05, + "loss": 0.9201, + "step": 1524 + }, + { + "epoch": 2.23, + "learning_rate": 7.973974721673815e-05, + "loss": 0.8758, + "step": 1525 + }, + { + "epoch": 2.23, + "learning_rate": 7.962427937421763e-05, + "loss": 0.8926, + "step": 1526 + }, + { + "epoch": 2.24, + "learning_rate": 7.950883986806821e-05, + "loss": 0.9891, + "step": 1527 + }, + { + "epoch": 2.24, + "learning_rate": 7.939342885883076e-05, + "loss": 0.9544, + "step": 1528 + }, + { + "epoch": 2.24, + "learning_rate": 7.927804650700659e-05, + "loss": 0.9669, + "step": 1529 + }, + { + "epoch": 2.24, + "learning_rate": 7.916269297305712e-05, + "loss": 0.9712, + "step": 1530 + }, + { + "epoch": 2.24, + "learning_rate": 7.90473684174037e-05, + "loss": 0.952, + "step": 1531 + }, + { + "epoch": 2.24, + "learning_rate": 7.89320730004274e-05, + "loss": 0.9196, + "step": 1532 + }, + { + "epoch": 2.24, + "learning_rate": 7.881680688246869e-05, + "loss": 1.0468, + "step": 1533 + }, + { + "epoch": 2.25, + "learning_rate": 7.870157022382735e-05, + "loss": 0.9278, + "step": 1534 + }, + { + "epoch": 2.25, + "learning_rate": 7.858636318476226e-05, + "loss": 0.9395, + "step": 1535 + }, + { + "epoch": 2.25, + "learning_rate": 7.847118592549099e-05, + "loss": 0.9261, + "step": 1536 + }, + { + "epoch": 2.25, + "learning_rate": 7.835603860618972e-05, + "loss": 0.8694, + "step": 1537 + }, + { + "epoch": 2.25, + "learning_rate": 7.824092138699307e-05, + "loss": 1.0352, + "step": 1538 + }, + { + "epoch": 2.25, + "learning_rate": 7.812583442799368e-05, + "loss": 0.8773, + "step": 1539 + }, + { + "epoch": 2.25, + "learning_rate": 7.801077788924224e-05, + "loss": 0.8136, + "step": 1540 + }, + { + "epoch": 2.26, + "learning_rate": 7.789575193074704e-05, + "loss": 0.9875, + "step": 1541 + }, + { + "epoch": 2.26, + "learning_rate": 7.778075671247385e-05, + "loss": 0.9006, + "step": 1542 + }, + { + "epoch": 2.26, + "learning_rate": 7.766579239434575e-05, + "loss": 0.9503, + "step": 1543 + }, + { + "epoch": 2.26, + "learning_rate": 7.755085913624274e-05, + "loss": 0.9282, + "step": 1544 + }, + { + "epoch": 2.26, + "learning_rate": 7.743595709800176e-05, + "loss": 0.9057, + "step": 1545 + }, + { + "epoch": 2.26, + "learning_rate": 7.732108643941625e-05, + "loss": 0.8577, + "step": 1546 + }, + { + "epoch": 2.27, + "learning_rate": 7.720624732023603e-05, + "loss": 1.0027, + "step": 1547 + }, + { + "epoch": 2.27, + "learning_rate": 7.709143990016702e-05, + "loss": 0.8771, + "step": 1548 + }, + { + "epoch": 2.27, + "learning_rate": 7.697666433887108e-05, + "loss": 0.9449, + "step": 1549 + }, + { + "epoch": 2.27, + "learning_rate": 7.686192079596586e-05, + "loss": 0.9362, + "step": 1550 + }, + { + "epoch": 2.27, + "learning_rate": 7.674720943102432e-05, + "loss": 0.9937, + "step": 1551 + }, + { + "epoch": 2.27, + "learning_rate": 7.663253040357476e-05, + "loss": 0.9218, + "step": 1552 + }, + { + "epoch": 2.27, + "learning_rate": 7.651788387310052e-05, + "loss": 0.9557, + "step": 1553 + }, + { + "epoch": 2.28, + "learning_rate": 7.640326999903967e-05, + "loss": 0.9602, + "step": 1554 + }, + { + "epoch": 2.28, + "learning_rate": 7.628868894078501e-05, + "loss": 0.9414, + "step": 1555 + }, + { + "epoch": 2.28, + "learning_rate": 7.617414085768351e-05, + "loss": 0.8552, + "step": 1556 + }, + { + "epoch": 2.28, + "learning_rate": 7.605962590903643e-05, + "loss": 0.9802, + "step": 1557 + }, + { + "epoch": 2.28, + "learning_rate": 7.594514425409893e-05, + "loss": 0.9204, + "step": 1558 + }, + { + "epoch": 2.28, + "learning_rate": 7.583069605207975e-05, + "loss": 0.9143, + "step": 1559 + }, + { + "epoch": 2.28, + "learning_rate": 7.571628146214129e-05, + "loss": 0.9383, + "step": 1560 + }, + { + "epoch": 2.29, + "learning_rate": 7.560190064339908e-05, + "loss": 0.9052, + "step": 1561 + }, + { + "epoch": 2.29, + "learning_rate": 7.548755375492172e-05, + "loss": 0.9063, + "step": 1562 + }, + { + "epoch": 2.29, + "learning_rate": 7.537324095573064e-05, + "loss": 0.9549, + "step": 1563 + }, + { + "epoch": 2.29, + "learning_rate": 7.525896240479976e-05, + "loss": 0.9499, + "step": 1564 + }, + { + "epoch": 2.29, + "learning_rate": 7.514471826105556e-05, + "loss": 0.8888, + "step": 1565 + }, + { + "epoch": 2.29, + "learning_rate": 7.503050868337653e-05, + "loss": 0.9178, + "step": 1566 + }, + { + "epoch": 2.3, + "learning_rate": 7.491633383059313e-05, + "loss": 0.9875, + "step": 1567 + }, + { + "epoch": 2.3, + "learning_rate": 7.48021938614875e-05, + "loss": 0.8715, + "step": 1568 + }, + { + "epoch": 2.3, + "learning_rate": 7.468808893479327e-05, + "loss": 0.9246, + "step": 1569 + }, + { + "epoch": 2.3, + "learning_rate": 7.457401920919544e-05, + "loss": 0.8635, + "step": 1570 + }, + { + "epoch": 2.3, + "learning_rate": 7.445998484332993e-05, + "loss": 0.95, + "step": 1571 + }, + { + "epoch": 2.3, + "learning_rate": 7.434598599578351e-05, + "loss": 0.9175, + "step": 1572 + }, + { + "epoch": 2.3, + "learning_rate": 7.42320228250936e-05, + "loss": 0.9439, + "step": 1573 + }, + { + "epoch": 2.31, + "learning_rate": 7.411809548974792e-05, + "loss": 0.9417, + "step": 1574 + }, + { + "epoch": 2.31, + "learning_rate": 7.400420414818451e-05, + "loss": 0.9001, + "step": 1575 + }, + { + "epoch": 2.31, + "learning_rate": 7.389034895879118e-05, + "loss": 0.8844, + "step": 1576 + }, + { + "epoch": 2.31, + "learning_rate": 7.37765300799056e-05, + "loss": 0.9377, + "step": 1577 + }, + { + "epoch": 2.31, + "learning_rate": 7.366274766981483e-05, + "loss": 0.9851, + "step": 1578 + }, + { + "epoch": 2.31, + "learning_rate": 7.354900188675525e-05, + "loss": 0.965, + "step": 1579 + }, + { + "epoch": 2.31, + "learning_rate": 7.343529288891239e-05, + "loss": 0.9737, + "step": 1580 + }, + { + "epoch": 2.32, + "learning_rate": 7.332162083442049e-05, + "loss": 0.9686, + "step": 1581 + }, + { + "epoch": 2.32, + "learning_rate": 7.320798588136253e-05, + "loss": 0.968, + "step": 1582 + }, + { + "epoch": 2.32, + "learning_rate": 7.309438818776981e-05, + "loss": 0.8647, + "step": 1583 + }, + { + "epoch": 2.32, + "learning_rate": 7.29808279116218e-05, + "loss": 0.8572, + "step": 1584 + }, + { + "epoch": 2.32, + "learning_rate": 7.286730521084602e-05, + "loss": 0.9264, + "step": 1585 + }, + { + "epoch": 2.32, + "learning_rate": 7.275382024331772e-05, + "loss": 0.9601, + "step": 1586 + }, + { + "epoch": 2.32, + "learning_rate": 7.264037316685962e-05, + "loss": 0.9996, + "step": 1587 + }, + { + "epoch": 2.33, + "learning_rate": 7.252696413924174e-05, + "loss": 0.9598, + "step": 1588 + }, + { + "epoch": 2.33, + "learning_rate": 7.24135933181812e-05, + "loss": 0.8675, + "step": 1589 + }, + { + "epoch": 2.33, + "learning_rate": 7.23002608613421e-05, + "loss": 0.9298, + "step": 1590 + }, + { + "epoch": 2.33, + "learning_rate": 7.218696692633501e-05, + "loss": 1.0448, + "step": 1591 + }, + { + "epoch": 2.33, + "learning_rate": 7.2073711670717e-05, + "loss": 0.9525, + "step": 1592 + }, + { + "epoch": 2.33, + "learning_rate": 7.196049525199142e-05, + "loss": 0.9621, + "step": 1593 + }, + { + "epoch": 2.34, + "learning_rate": 7.184731782760746e-05, + "loss": 0.933, + "step": 1594 + }, + { + "epoch": 2.34, + "learning_rate": 7.173417955496024e-05, + "loss": 1.0498, + "step": 1595 + }, + { + "epoch": 2.34, + "learning_rate": 7.162108059139032e-05, + "loss": 0.9508, + "step": 1596 + }, + { + "epoch": 2.34, + "learning_rate": 7.150802109418366e-05, + "loss": 0.9854, + "step": 1597 + }, + { + "epoch": 2.34, + "learning_rate": 7.13950012205713e-05, + "loss": 0.971, + "step": 1598 + }, + { + "epoch": 2.34, + "learning_rate": 7.128202112772912e-05, + "loss": 0.9346, + "step": 1599 + }, + { + "epoch": 2.34, + "learning_rate": 7.116908097277781e-05, + "loss": 1.0434, + "step": 1600 + }, + { + "epoch": 2.35, + "learning_rate": 7.105618091278245e-05, + "loss": 0.9354, + "step": 1601 + }, + { + "epoch": 2.35, + "learning_rate": 7.094332110475234e-05, + "loss": 0.9009, + "step": 1602 + }, + { + "epoch": 2.35, + "learning_rate": 7.083050170564077e-05, + "loss": 0.9379, + "step": 1603 + }, + { + "epoch": 2.35, + "learning_rate": 7.071772287234497e-05, + "loss": 0.9488, + "step": 1604 + }, + { + "epoch": 2.35, + "learning_rate": 7.06049847617056e-05, + "loss": 0.8778, + "step": 1605 + }, + { + "epoch": 2.35, + "learning_rate": 7.049228753050681e-05, + "loss": 0.8452, + "step": 1606 + }, + { + "epoch": 2.35, + "learning_rate": 7.037963133547583e-05, + "loss": 0.8394, + "step": 1607 + }, + { + "epoch": 2.36, + "learning_rate": 7.026701633328276e-05, + "loss": 0.8895, + "step": 1608 + }, + { + "epoch": 2.36, + "learning_rate": 7.015444268054059e-05, + "loss": 0.9667, + "step": 1609 + }, + { + "epoch": 2.36, + "learning_rate": 7.004191053380469e-05, + "loss": 0.9573, + "step": 1610 + }, + { + "epoch": 2.36, + "learning_rate": 6.992942004957271e-05, + "loss": 1.0102, + "step": 1611 + }, + { + "epoch": 2.36, + "learning_rate": 6.981697138428434e-05, + "loss": 0.9507, + "step": 1612 + }, + { + "epoch": 2.36, + "learning_rate": 6.970456469432117e-05, + "loss": 0.9597, + "step": 1613 + }, + { + "epoch": 2.37, + "learning_rate": 6.959220013600641e-05, + "loss": 0.8432, + "step": 1614 + }, + { + "epoch": 2.37, + "learning_rate": 6.947987786560466e-05, + "loss": 0.9855, + "step": 1615 + }, + { + "epoch": 2.37, + "learning_rate": 6.936759803932167e-05, + "loss": 0.922, + "step": 1616 + }, + { + "epoch": 2.37, + "learning_rate": 6.925536081330424e-05, + "loss": 0.9261, + "step": 1617 + }, + { + "epoch": 2.37, + "learning_rate": 6.914316634363984e-05, + "loss": 0.9046, + "step": 1618 + }, + { + "epoch": 2.37, + "learning_rate": 6.903101478635662e-05, + "loss": 0.93, + "step": 1619 + }, + { + "epoch": 2.37, + "learning_rate": 6.891890629742288e-05, + "loss": 0.8668, + "step": 1620 + }, + { + "epoch": 2.38, + "learning_rate": 6.880684103274715e-05, + "loss": 0.9852, + "step": 1621 + }, + { + "epoch": 2.38, + "learning_rate": 6.869481914817779e-05, + "loss": 0.976, + "step": 1622 + }, + { + "epoch": 2.38, + "learning_rate": 6.85828407995028e-05, + "loss": 0.9287, + "step": 1623 + }, + { + "epoch": 2.38, + "learning_rate": 6.847090614244977e-05, + "loss": 0.9192, + "step": 1624 + }, + { + "epoch": 2.38, + "learning_rate": 6.835901533268536e-05, + "loss": 0.9999, + "step": 1625 + }, + { + "epoch": 2.38, + "learning_rate": 6.824716852581539e-05, + "loss": 0.8986, + "step": 1626 + }, + { + "epoch": 2.38, + "learning_rate": 6.813536587738436e-05, + "loss": 0.9818, + "step": 1627 + }, + { + "epoch": 2.39, + "learning_rate": 6.802360754287547e-05, + "loss": 0.9658, + "step": 1628 + }, + { + "epoch": 2.39, + "learning_rate": 6.791189367771025e-05, + "loss": 0.8793, + "step": 1629 + }, + { + "epoch": 2.39, + "learning_rate": 6.780022443724839e-05, + "loss": 0.9363, + "step": 1630 + }, + { + "epoch": 2.39, + "learning_rate": 6.768859997678751e-05, + "loss": 0.9108, + "step": 1631 + }, + { + "epoch": 2.39, + "learning_rate": 6.757702045156292e-05, + "loss": 0.9561, + "step": 1632 + }, + { + "epoch": 2.39, + "learning_rate": 6.74654860167475e-05, + "loss": 0.8761, + "step": 1633 + }, + { + "epoch": 2.39, + "learning_rate": 6.735399682745145e-05, + "loss": 0.9307, + "step": 1634 + }, + { + "epoch": 2.4, + "learning_rate": 6.724255303872197e-05, + "loss": 0.8416, + "step": 1635 + }, + { + "epoch": 2.4, + "learning_rate": 6.713115480554313e-05, + "loss": 0.908, + "step": 1636 + }, + { + "epoch": 2.4, + "learning_rate": 6.701980228283568e-05, + "loss": 0.9147, + "step": 1637 + }, + { + "epoch": 2.4, + "learning_rate": 6.690849562545678e-05, + "loss": 0.9877, + "step": 1638 + }, + { + "epoch": 2.4, + "learning_rate": 6.679723498819986e-05, + "loss": 0.9294, + "step": 1639 + }, + { + "epoch": 2.4, + "learning_rate": 6.668602052579424e-05, + "loss": 0.929, + "step": 1640 + }, + { + "epoch": 2.41, + "learning_rate": 6.657485239290515e-05, + "loss": 0.8996, + "step": 1641 + }, + { + "epoch": 2.41, + "learning_rate": 6.64637307441333e-05, + "loss": 0.8994, + "step": 1642 + }, + { + "epoch": 2.41, + "learning_rate": 6.635265573401474e-05, + "loss": 0.9253, + "step": 1643 + }, + { + "epoch": 2.41, + "learning_rate": 6.624162751702076e-05, + "loss": 0.8867, + "step": 1644 + }, + { + "epoch": 2.41, + "learning_rate": 6.613064624755753e-05, + "loss": 0.987, + "step": 1645 + }, + { + "epoch": 2.41, + "learning_rate": 6.601971207996591e-05, + "loss": 0.9079, + "step": 1646 + }, + { + "epoch": 2.41, + "learning_rate": 6.590882516852122e-05, + "loss": 0.9696, + "step": 1647 + }, + { + "epoch": 2.42, + "learning_rate": 6.579798566743314e-05, + "loss": 0.964, + "step": 1648 + }, + { + "epoch": 2.42, + "learning_rate": 6.568719373084538e-05, + "loss": 0.9736, + "step": 1649 + }, + { + "epoch": 2.42, + "learning_rate": 6.557644951283551e-05, + "loss": 0.9806, + "step": 1650 + }, + { + "epoch": 2.42, + "learning_rate": 6.546575316741474e-05, + "loss": 1.0062, + "step": 1651 + }, + { + "epoch": 2.42, + "learning_rate": 6.535510484852767e-05, + "loss": 0.8794, + "step": 1652 + }, + { + "epoch": 2.42, + "learning_rate": 6.524450471005213e-05, + "loss": 0.8359, + "step": 1653 + }, + { + "epoch": 2.42, + "learning_rate": 6.513395290579901e-05, + "loss": 0.9442, + "step": 1654 + }, + { + "epoch": 2.43, + "learning_rate": 6.50234495895119e-05, + "loss": 0.8903, + "step": 1655 + }, + { + "epoch": 2.43, + "learning_rate": 6.491299491486695e-05, + "loss": 0.9625, + "step": 1656 + }, + { + "epoch": 2.43, + "learning_rate": 6.480258903547276e-05, + "loss": 0.929, + "step": 1657 + }, + { + "epoch": 2.43, + "learning_rate": 6.469223210486992e-05, + "loss": 0.9751, + "step": 1658 + }, + { + "epoch": 2.43, + "learning_rate": 6.458192427653112e-05, + "loss": 0.982, + "step": 1659 + }, + { + "epoch": 2.43, + "learning_rate": 6.447166570386063e-05, + "loss": 0.8681, + "step": 1660 + }, + { + "epoch": 2.44, + "learning_rate": 6.436145654019432e-05, + "loss": 0.9149, + "step": 1661 + }, + { + "epoch": 2.44, + "learning_rate": 6.425129693879925e-05, + "loss": 0.9411, + "step": 1662 + }, + { + "epoch": 2.44, + "learning_rate": 6.414118705287359e-05, + "loss": 0.9072, + "step": 1663 + }, + { + "epoch": 2.44, + "learning_rate": 6.403112703554643e-05, + "loss": 0.9784, + "step": 1664 + }, + { + "epoch": 2.44, + "learning_rate": 6.392111703987744e-05, + "loss": 0.8754, + "step": 1665 + }, + { + "epoch": 2.44, + "learning_rate": 6.381115721885675e-05, + "loss": 1.0216, + "step": 1666 + }, + { + "epoch": 2.44, + "learning_rate": 6.370124772540469e-05, + "loss": 0.8493, + "step": 1667 + }, + { + "epoch": 2.45, + "learning_rate": 6.35913887123716e-05, + "loss": 0.9215, + "step": 1668 + }, + { + "epoch": 2.45, + "learning_rate": 6.348158033253773e-05, + "loss": 0.919, + "step": 1669 + }, + { + "epoch": 2.45, + "learning_rate": 6.337182273861273e-05, + "loss": 0.9128, + "step": 1670 + }, + { + "epoch": 2.45, + "learning_rate": 6.326211608323573e-05, + "loss": 0.9469, + "step": 1671 + }, + { + "epoch": 2.45, + "learning_rate": 6.315246051897503e-05, + "loss": 0.8582, + "step": 1672 + }, + { + "epoch": 2.45, + "learning_rate": 6.30428561983278e-05, + "loss": 0.8487, + "step": 1673 + }, + { + "epoch": 2.45, + "learning_rate": 6.293330327372005e-05, + "loss": 0.8687, + "step": 1674 + }, + { + "epoch": 2.46, + "learning_rate": 6.282380189750625e-05, + "loss": 0.9028, + "step": 1675 + }, + { + "epoch": 2.46, + "learning_rate": 6.271435222196916e-05, + "loss": 0.9105, + "step": 1676 + }, + { + "epoch": 2.46, + "learning_rate": 6.26049543993197e-05, + "loss": 0.9384, + "step": 1677 + }, + { + "epoch": 2.46, + "learning_rate": 6.249560858169661e-05, + "loss": 0.9658, + "step": 1678 + }, + { + "epoch": 2.46, + "learning_rate": 6.238631492116644e-05, + "loss": 0.9193, + "step": 1679 + }, + { + "epoch": 2.46, + "learning_rate": 6.227707356972301e-05, + "loss": 0.9077, + "step": 1680 + }, + { + "epoch": 2.46, + "eval_loss": 0.9968231916427612, + "eval_runtime": 2.6101, + "eval_samples_per_second": 418.377, + "eval_steps_per_second": 26.436, + "step": 1680 + }, + { + "epoch": 2.46, + "learning_rate": 6.216788467928758e-05, + "loss": 0.9083, + "step": 1681 + }, + { + "epoch": 2.47, + "learning_rate": 6.205874840170833e-05, + "loss": 0.9397, + "step": 1682 + }, + { + "epoch": 2.47, + "learning_rate": 6.194966488876027e-05, + "loss": 0.9631, + "step": 1683 + }, + { + "epoch": 2.47, + "learning_rate": 6.184063429214515e-05, + "loss": 0.9214, + "step": 1684 + }, + { + "epoch": 2.47, + "learning_rate": 6.173165676349103e-05, + "loss": 0.9544, + "step": 1685 + }, + { + "epoch": 2.47, + "learning_rate": 6.162273245435219e-05, + "loss": 0.9303, + "step": 1686 + }, + { + "epoch": 2.47, + "learning_rate": 6.151386151620887e-05, + "loss": 0.8605, + "step": 1687 + }, + { + "epoch": 2.48, + "learning_rate": 6.140504410046712e-05, + "loss": 0.9125, + "step": 1688 + }, + { + "epoch": 2.48, + "learning_rate": 6.129628035845861e-05, + "loss": 0.9202, + "step": 1689 + }, + { + "epoch": 2.48, + "learning_rate": 6.118757044144025e-05, + "loss": 0.937, + "step": 1690 + }, + { + "epoch": 2.48, + "learning_rate": 6.107891450059419e-05, + "loss": 0.8941, + "step": 1691 + }, + { + "epoch": 2.48, + "learning_rate": 6.097031268702746e-05, + "loss": 0.9205, + "step": 1692 + }, + { + "epoch": 2.48, + "learning_rate": 6.086176515177182e-05, + "loss": 0.9164, + "step": 1693 + }, + { + "epoch": 2.48, + "learning_rate": 6.0753272045783625e-05, + "loss": 0.9261, + "step": 1694 + }, + { + "epoch": 2.49, + "learning_rate": 6.0644833519943425e-05, + "loss": 0.8642, + "step": 1695 + }, + { + "epoch": 2.49, + "learning_rate": 6.053644972505593e-05, + "loss": 0.9648, + "step": 1696 + }, + { + "epoch": 2.49, + "learning_rate": 6.042812081184972e-05, + "loss": 0.9213, + "step": 1697 + }, + { + "epoch": 2.49, + "learning_rate": 6.0319846930977e-05, + "loss": 0.9314, + "step": 1698 + }, + { + "epoch": 2.49, + "learning_rate": 6.021162823301358e-05, + "loss": 0.9055, + "step": 1699 + }, + { + "epoch": 2.49, + "learning_rate": 6.010346486845837e-05, + "loss": 0.9335, + "step": 1700 + }, + { + "epoch": 2.49, + "learning_rate": 5.9995356987733466e-05, + "loss": 0.9365, + "step": 1701 + }, + { + "epoch": 2.5, + "learning_rate": 5.988730474118367e-05, + "loss": 0.9817, + "step": 1702 + }, + { + "epoch": 2.5, + "learning_rate": 5.977930827907649e-05, + "loss": 0.9145, + "step": 1703 + }, + { + "epoch": 2.5, + "learning_rate": 5.967136775160187e-05, + "loss": 0.9974, + "step": 1704 + }, + { + "epoch": 2.5, + "learning_rate": 5.956348330887196e-05, + "loss": 0.9577, + "step": 1705 + }, + { + "epoch": 2.5, + "learning_rate": 5.945565510092086e-05, + "loss": 0.8716, + "step": 1706 + }, + { + "epoch": 2.5, + "learning_rate": 5.93478832777045e-05, + "loss": 0.9541, + "step": 1707 + }, + { + "epoch": 2.51, + "learning_rate": 5.924016798910037e-05, + "loss": 0.9464, + "step": 1708 + }, + { + "epoch": 2.51, + "learning_rate": 5.913250938490744e-05, + "loss": 0.9774, + "step": 1709 + }, + { + "epoch": 2.51, + "learning_rate": 5.9024907614845716e-05, + "loss": 0.8654, + "step": 1710 + }, + { + "epoch": 2.51, + "learning_rate": 5.891736282855622e-05, + "loss": 0.8559, + "step": 1711 + }, + { + "epoch": 2.51, + "learning_rate": 5.880987517560075e-05, + "loss": 1.0032, + "step": 1712 + }, + { + "epoch": 2.51, + "learning_rate": 5.870244480546159e-05, + "loss": 0.9373, + "step": 1713 + }, + { + "epoch": 2.51, + "learning_rate": 5.859507186754146e-05, + "loss": 0.8892, + "step": 1714 + }, + { + "epoch": 2.52, + "learning_rate": 5.848775651116309e-05, + "loss": 0.8809, + "step": 1715 + }, + { + "epoch": 2.52, + "learning_rate": 5.838049888556925e-05, + "loss": 0.9596, + "step": 1716 + }, + { + "epoch": 2.52, + "learning_rate": 5.827329913992232e-05, + "loss": 0.95, + "step": 1717 + }, + { + "epoch": 2.52, + "learning_rate": 5.81661574233042e-05, + "loss": 0.9694, + "step": 1718 + }, + { + "epoch": 2.52, + "learning_rate": 5.80590738847162e-05, + "loss": 0.9333, + "step": 1719 + }, + { + "epoch": 2.52, + "learning_rate": 5.79520486730786e-05, + "loss": 0.943, + "step": 1720 + }, + { + "epoch": 2.52, + "learning_rate": 5.784508193723057e-05, + "loss": 0.906, + "step": 1721 + }, + { + "epoch": 2.53, + "learning_rate": 5.773817382593008e-05, + "loss": 0.9575, + "step": 1722 + }, + { + "epoch": 2.53, + "learning_rate": 5.763132448785339e-05, + "loss": 0.8638, + "step": 1723 + }, + { + "epoch": 2.53, + "learning_rate": 5.752453407159522e-05, + "loss": 0.9667, + "step": 1724 + }, + { + "epoch": 2.53, + "learning_rate": 5.741780272566821e-05, + "loss": 1.0119, + "step": 1725 + }, + { + "epoch": 2.53, + "learning_rate": 5.7311130598502885e-05, + "loss": 0.8563, + "step": 1726 + }, + { + "epoch": 2.53, + "learning_rate": 5.7204517838447405e-05, + "loss": 0.9852, + "step": 1727 + }, + { + "epoch": 2.54, + "learning_rate": 5.7097964593767375e-05, + "loss": 0.8659, + "step": 1728 + }, + { + "epoch": 2.54, + "learning_rate": 5.699147101264566e-05, + "loss": 0.9579, + "step": 1729 + }, + { + "epoch": 2.54, + "learning_rate": 5.688503724318217e-05, + "loss": 0.9226, + "step": 1730 + }, + { + "epoch": 2.54, + "learning_rate": 5.6778663433393574e-05, + "loss": 0.9771, + "step": 1731 + }, + { + "epoch": 2.54, + "learning_rate": 5.667234973121317e-05, + "loss": 0.8307, + "step": 1732 + }, + { + "epoch": 2.54, + "learning_rate": 5.6566096284490635e-05, + "loss": 0.9012, + "step": 1733 + }, + { + "epoch": 2.54, + "learning_rate": 5.645990324099197e-05, + "loss": 0.8566, + "step": 1734 + }, + { + "epoch": 2.55, + "learning_rate": 5.635377074839907e-05, + "loss": 0.941, + "step": 1735 + }, + { + "epoch": 2.55, + "learning_rate": 5.624769895430961e-05, + "loss": 0.8925, + "step": 1736 + }, + { + "epoch": 2.55, + "learning_rate": 5.614168800623687e-05, + "loss": 0.9387, + "step": 1737 + }, + { + "epoch": 2.55, + "learning_rate": 5.6035738051609555e-05, + "loss": 0.9025, + "step": 1738 + }, + { + "epoch": 2.55, + "learning_rate": 5.5929849237771556e-05, + "loss": 0.9394, + "step": 1739 + }, + { + "epoch": 2.55, + "learning_rate": 5.5824021711981686e-05, + "loss": 0.9743, + "step": 1740 + }, + { + "epoch": 2.55, + "learning_rate": 5.5718255621413526e-05, + "loss": 0.9604, + "step": 1741 + }, + { + "epoch": 2.56, + "learning_rate": 5.561255111315524e-05, + "loss": 0.9223, + "step": 1742 + }, + { + "epoch": 2.56, + "learning_rate": 5.550690833420928e-05, + "loss": 0.8748, + "step": 1743 + }, + { + "epoch": 2.56, + "learning_rate": 5.540132743149242e-05, + "loss": 1.0009, + "step": 1744 + }, + { + "epoch": 2.56, + "learning_rate": 5.5295808551835184e-05, + "loss": 0.9533, + "step": 1745 + }, + { + "epoch": 2.56, + "learning_rate": 5.5190351841982014e-05, + "loss": 0.9026, + "step": 1746 + }, + { + "epoch": 2.56, + "learning_rate": 5.508495744859077e-05, + "loss": 0.9514, + "step": 1747 + }, + { + "epoch": 2.56, + "learning_rate": 5.497962551823266e-05, + "loss": 0.9328, + "step": 1748 + }, + { + "epoch": 2.57, + "learning_rate": 5.487435619739214e-05, + "loss": 0.9765, + "step": 1749 + }, + { + "epoch": 2.57, + "learning_rate": 5.476914963246647e-05, + "loss": 0.8417, + "step": 1750 + }, + { + "epoch": 2.57, + "learning_rate": 5.4664005969765674e-05, + "loss": 0.93, + "step": 1751 + }, + { + "epoch": 2.57, + "learning_rate": 5.4558925355512256e-05, + "loss": 0.8757, + "step": 1752 + }, + { + "epoch": 2.57, + "learning_rate": 5.445390793584115e-05, + "loss": 0.9529, + "step": 1753 + }, + { + "epoch": 2.57, + "learning_rate": 5.434895385679937e-05, + "loss": 0.8961, + "step": 1754 + }, + { + "epoch": 2.58, + "learning_rate": 5.4244063264345745e-05, + "loss": 0.844, + "step": 1755 + }, + { + "epoch": 2.58, + "learning_rate": 5.4139236304350935e-05, + "loss": 0.9071, + "step": 1756 + }, + { + "epoch": 2.58, + "learning_rate": 5.403447312259702e-05, + "loss": 0.8767, + "step": 1757 + }, + { + "epoch": 2.58, + "learning_rate": 5.392977386477738e-05, + "loss": 0.9043, + "step": 1758 + }, + { + "epoch": 2.58, + "learning_rate": 5.382513867649663e-05, + "loss": 1.0033, + "step": 1759 + }, + { + "epoch": 2.58, + "learning_rate": 5.372056770327013e-05, + "loss": 0.9496, + "step": 1760 + }, + { + "epoch": 2.58, + "learning_rate": 5.361606109052397e-05, + "loss": 0.9717, + "step": 1761 + }, + { + "epoch": 2.59, + "learning_rate": 5.3511618983594845e-05, + "loss": 0.9332, + "step": 1762 + }, + { + "epoch": 2.59, + "learning_rate": 5.340724152772956e-05, + "loss": 0.8972, + "step": 1763 + }, + { + "epoch": 2.59, + "learning_rate": 5.33029288680852e-05, + "loss": 1.0025, + "step": 1764 + }, + { + "epoch": 2.59, + "learning_rate": 5.31986811497286e-05, + "loss": 0.9639, + "step": 1765 + }, + { + "epoch": 2.59, + "learning_rate": 5.309449851763633e-05, + "loss": 0.9181, + "step": 1766 + }, + { + "epoch": 2.59, + "learning_rate": 5.299038111669444e-05, + "loss": 0.9608, + "step": 1767 + }, + { + "epoch": 2.59, + "learning_rate": 5.288632909169823e-05, + "loss": 0.9961, + "step": 1768 + }, + { + "epoch": 2.6, + "learning_rate": 5.2782342587352154e-05, + "loss": 0.9709, + "step": 1769 + }, + { + "epoch": 2.6, + "learning_rate": 5.267842174826955e-05, + "loss": 1.0208, + "step": 1770 + }, + { + "epoch": 2.6, + "learning_rate": 5.2574566718972364e-05, + "loss": 0.8965, + "step": 1771 + }, + { + "epoch": 2.6, + "learning_rate": 5.247077764389099e-05, + "loss": 0.9577, + "step": 1772 + }, + { + "epoch": 2.6, + "learning_rate": 5.236705466736428e-05, + "loss": 0.9513, + "step": 1773 + }, + { + "epoch": 2.6, + "learning_rate": 5.226339793363898e-05, + "loss": 0.995, + "step": 1774 + }, + { + "epoch": 2.61, + "learning_rate": 5.215980758686978e-05, + "loss": 0.8956, + "step": 1775 + }, + { + "epoch": 2.61, + "learning_rate": 5.205628377111902e-05, + "loss": 0.9213, + "step": 1776 + }, + { + "epoch": 2.61, + "learning_rate": 5.195282663035661e-05, + "loss": 0.9413, + "step": 1777 + }, + { + "epoch": 2.61, + "learning_rate": 5.18494363084596e-05, + "loss": 0.9791, + "step": 1778 + }, + { + "epoch": 2.61, + "learning_rate": 5.174611294921224e-05, + "loss": 0.9651, + "step": 1779 + }, + { + "epoch": 2.61, + "learning_rate": 5.1642856696305575e-05, + "loss": 0.9441, + "step": 1780 + }, + { + "epoch": 2.61, + "learning_rate": 5.1539667693337335e-05, + "loss": 0.9206, + "step": 1781 + }, + { + "epoch": 2.62, + "learning_rate": 5.143654608381172e-05, + "loss": 0.8989, + "step": 1782 + }, + { + "epoch": 2.62, + "learning_rate": 5.133349201113929e-05, + "loss": 0.9114, + "step": 1783 + }, + { + "epoch": 2.62, + "learning_rate": 5.123050561863657e-05, + "loss": 0.8995, + "step": 1784 + }, + { + "epoch": 2.62, + "learning_rate": 5.112758704952598e-05, + "loss": 0.952, + "step": 1785 + }, + { + "epoch": 2.62, + "learning_rate": 5.1024736446935754e-05, + "loss": 0.8444, + "step": 1786 + }, + { + "epoch": 2.62, + "learning_rate": 5.092195395389937e-05, + "loss": 0.9613, + "step": 1787 + }, + { + "epoch": 2.62, + "learning_rate": 5.081923971335582e-05, + "loss": 0.9385, + "step": 1788 + }, + { + "epoch": 2.63, + "learning_rate": 5.071659386814907e-05, + "loss": 0.8304, + "step": 1789 + }, + { + "epoch": 2.63, + "learning_rate": 5.061401656102791e-05, + "loss": 0.9589, + "step": 1790 + }, + { + "epoch": 2.63, + "learning_rate": 5.051150793464592e-05, + "loss": 0.9628, + "step": 1791 + }, + { + "epoch": 2.63, + "learning_rate": 5.0409068131561067e-05, + "loss": 0.9054, + "step": 1792 + }, + { + "epoch": 2.63, + "learning_rate": 5.0306697294235714e-05, + "loss": 0.8631, + "step": 1793 + }, + { + "epoch": 2.63, + "learning_rate": 5.020439556503629e-05, + "loss": 0.8949, + "step": 1794 + }, + { + "epoch": 2.63, + "learning_rate": 5.0102163086233065e-05, + "loss": 1.0095, + "step": 1795 + }, + { + "epoch": 2.64, + "learning_rate": 5.000000000000002e-05, + "loss": 1.0535, + "step": 1796 + }, + { + "epoch": 2.64, + "learning_rate": 4.98979064484146e-05, + "loss": 0.9593, + "step": 1797 + }, + { + "epoch": 2.64, + "learning_rate": 4.979588257345766e-05, + "loss": 0.9763, + "step": 1798 + }, + { + "epoch": 2.64, + "learning_rate": 4.969392851701305e-05, + "loss": 0.9788, + "step": 1799 + }, + { + "epoch": 2.64, + "learning_rate": 4.959204442086753e-05, + "loss": 0.9516, + "step": 1800 + }, + { + "epoch": 2.64, + "learning_rate": 4.949023042671066e-05, + "loss": 0.9403, + "step": 1801 + }, + { + "epoch": 2.65, + "learning_rate": 4.938848667613436e-05, + "loss": 0.9843, + "step": 1802 + }, + { + "epoch": 2.65, + "learning_rate": 4.928681331063304e-05, + "loss": 1.0064, + "step": 1803 + }, + { + "epoch": 2.65, + "learning_rate": 4.918521047160308e-05, + "loss": 0.9603, + "step": 1804 + }, + { + "epoch": 2.65, + "learning_rate": 4.908367830034284e-05, + "loss": 0.8546, + "step": 1805 + }, + { + "epoch": 2.65, + "learning_rate": 4.8982216938052394e-05, + "loss": 0.9444, + "step": 1806 + }, + { + "epoch": 2.65, + "learning_rate": 4.888082652583331e-05, + "loss": 0.9918, + "step": 1807 + }, + { + "epoch": 2.65, + "learning_rate": 4.877950720468859e-05, + "loss": 0.8489, + "step": 1808 + }, + { + "epoch": 2.66, + "learning_rate": 4.8678259115522215e-05, + "loss": 1.0195, + "step": 1809 + }, + { + "epoch": 2.66, + "learning_rate": 4.8577082399139296e-05, + "loss": 0.9962, + "step": 1810 + }, + { + "epoch": 2.66, + "learning_rate": 4.8475977196245504e-05, + "loss": 0.9836, + "step": 1811 + }, + { + "epoch": 2.66, + "learning_rate": 4.837494364744711e-05, + "loss": 0.8597, + "step": 1812 + }, + { + "epoch": 2.66, + "learning_rate": 4.827398189325085e-05, + "loss": 1.002, + "step": 1813 + }, + { + "epoch": 2.66, + "learning_rate": 4.817309207406346e-05, + "loss": 0.9228, + "step": 1814 + }, + { + "epoch": 2.66, + "learning_rate": 4.8072274330191725e-05, + "loss": 0.9954, + "step": 1815 + }, + { + "epoch": 2.67, + "learning_rate": 4.7971528801842116e-05, + "loss": 0.9124, + "step": 1816 + }, + { + "epoch": 2.67, + "learning_rate": 4.787085562912076e-05, + "loss": 0.925, + "step": 1817 + }, + { + "epoch": 2.67, + "learning_rate": 4.777025495203319e-05, + "loss": 0.9245, + "step": 1818 + }, + { + "epoch": 2.67, + "learning_rate": 4.7669726910484e-05, + "loss": 0.8668, + "step": 1819 + }, + { + "epoch": 2.67, + "learning_rate": 4.756927164427685e-05, + "loss": 1.0186, + "step": 1820 + }, + { + "epoch": 2.67, + "learning_rate": 4.746888929311415e-05, + "loss": 0.95, + "step": 1821 + }, + { + "epoch": 2.68, + "learning_rate": 4.7368579996596904e-05, + "loss": 0.9373, + "step": 1822 + }, + { + "epoch": 2.68, + "learning_rate": 4.726834389422461e-05, + "loss": 0.9235, + "step": 1823 + }, + { + "epoch": 2.68, + "learning_rate": 4.716818112539485e-05, + "loss": 0.8682, + "step": 1824 + }, + { + "epoch": 2.68, + "learning_rate": 4.706809182940334e-05, + "loss": 0.9166, + "step": 1825 + }, + { + "epoch": 2.68, + "learning_rate": 4.6968076145443515e-05, + "loss": 0.9526, + "step": 1826 + }, + { + "epoch": 2.68, + "learning_rate": 4.686813421260646e-05, + "loss": 0.8961, + "step": 1827 + }, + { + "epoch": 2.68, + "learning_rate": 4.6768266169880804e-05, + "loss": 0.9258, + "step": 1828 + }, + { + "epoch": 2.69, + "learning_rate": 4.666847215615226e-05, + "loss": 0.9526, + "step": 1829 + }, + { + "epoch": 2.69, + "learning_rate": 4.656875231020368e-05, + "loss": 0.9606, + "step": 1830 + }, + { + "epoch": 2.69, + "learning_rate": 4.6469106770714745e-05, + "loss": 0.9352, + "step": 1831 + }, + { + "epoch": 2.69, + "learning_rate": 4.636953567626177e-05, + "loss": 0.9786, + "step": 1832 + }, + { + "epoch": 2.69, + "learning_rate": 4.6270039165317605e-05, + "loss": 0.8797, + "step": 1833 + }, + { + "epoch": 2.69, + "learning_rate": 4.617061737625139e-05, + "loss": 0.9609, + "step": 1834 + }, + { + "epoch": 2.69, + "learning_rate": 4.6071270447328276e-05, + "loss": 0.8972, + "step": 1835 + }, + { + "epoch": 2.7, + "learning_rate": 4.597199851670932e-05, + "loss": 0.9233, + "step": 1836 + }, + { + "epoch": 2.7, + "learning_rate": 4.587280172245129e-05, + "loss": 0.9455, + "step": 1837 + }, + { + "epoch": 2.7, + "learning_rate": 4.57736802025065e-05, + "loss": 0.8729, + "step": 1838 + }, + { + "epoch": 2.7, + "learning_rate": 4.567463409472255e-05, + "loss": 0.8894, + "step": 1839 + }, + { + "epoch": 2.7, + "learning_rate": 4.557566353684209e-05, + "loss": 0.9784, + "step": 1840 + }, + { + "epoch": 2.7, + "learning_rate": 4.547676866650289e-05, + "loss": 0.9586, + "step": 1841 + }, + { + "epoch": 2.7, + "learning_rate": 4.537794962123726e-05, + "loss": 0.978, + "step": 1842 + }, + { + "epoch": 2.71, + "learning_rate": 4.527920653847221e-05, + "loss": 0.9431, + "step": 1843 + }, + { + "epoch": 2.71, + "learning_rate": 4.518053955552903e-05, + "loss": 0.8657, + "step": 1844 + }, + { + "epoch": 2.71, + "learning_rate": 4.50819488096232e-05, + "loss": 0.7781, + "step": 1845 + }, + { + "epoch": 2.71, + "learning_rate": 4.498343443786416e-05, + "loss": 0.9086, + "step": 1846 + }, + { + "epoch": 2.71, + "learning_rate": 4.488499657725511e-05, + "loss": 0.897, + "step": 1847 + }, + { + "epoch": 2.71, + "learning_rate": 4.478663536469296e-05, + "loss": 0.9494, + "step": 1848 + }, + { + "epoch": 2.71, + "eval_loss": 0.9907075762748718, + "eval_runtime": 2.6095, + "eval_samples_per_second": 418.473, + "eval_steps_per_second": 26.442, + "step": 1848 + }, + { + "epoch": 2.72, + "learning_rate": 4.468835093696796e-05, + "loss": 0.9201, + "step": 1849 + }, + { + "epoch": 2.72, + "learning_rate": 4.4590143430763555e-05, + "loss": 0.989, + "step": 1850 + }, + { + "epoch": 2.72, + "learning_rate": 4.449201298265622e-05, + "loss": 0.8615, + "step": 1851 + }, + { + "epoch": 2.72, + "learning_rate": 4.4393959729115244e-05, + "loss": 0.9388, + "step": 1852 + }, + { + "epoch": 2.72, + "learning_rate": 4.429598380650266e-05, + "loss": 0.9179, + "step": 1853 + }, + { + "epoch": 2.72, + "learning_rate": 4.419808535107287e-05, + "loss": 0.9471, + "step": 1854 + }, + { + "epoch": 2.72, + "learning_rate": 4.4100264498972564e-05, + "loss": 0.9584, + "step": 1855 + }, + { + "epoch": 2.73, + "learning_rate": 4.4002521386240466e-05, + "loss": 0.9077, + "step": 1856 + }, + { + "epoch": 2.73, + "learning_rate": 4.3904856148807284e-05, + "loss": 0.876, + "step": 1857 + }, + { + "epoch": 2.73, + "learning_rate": 4.3807268922495406e-05, + "loss": 0.9785, + "step": 1858 + }, + { + "epoch": 2.73, + "learning_rate": 4.370975984301866e-05, + "loss": 0.8762, + "step": 1859 + }, + { + "epoch": 2.73, + "learning_rate": 4.3612329045982236e-05, + "loss": 0.8797, + "step": 1860 + }, + { + "epoch": 2.73, + "learning_rate": 4.351497666688246e-05, + "loss": 1.05, + "step": 1861 + }, + { + "epoch": 2.73, + "learning_rate": 4.341770284110656e-05, + "loss": 0.9844, + "step": 1862 + }, + { + "epoch": 2.74, + "learning_rate": 4.332050770393263e-05, + "loss": 0.9814, + "step": 1863 + }, + { + "epoch": 2.74, + "learning_rate": 4.322339139052921e-05, + "loss": 0.7994, + "step": 1864 + }, + { + "epoch": 2.74, + "learning_rate": 4.312635403595532e-05, + "loss": 0.8213, + "step": 1865 + }, + { + "epoch": 2.74, + "learning_rate": 4.3029395775160106e-05, + "loss": 0.9036, + "step": 1866 + }, + { + "epoch": 2.74, + "learning_rate": 4.293251674298269e-05, + "loss": 0.9382, + "step": 1867 + }, + { + "epoch": 2.74, + "learning_rate": 4.283571707415214e-05, + "loss": 0.9933, + "step": 1868 + }, + { + "epoch": 2.75, + "learning_rate": 4.273899690328702e-05, + "loss": 0.897, + "step": 1869 + }, + { + "epoch": 2.75, + "learning_rate": 4.264235636489542e-05, + "loss": 0.9293, + "step": 1870 + }, + { + "epoch": 2.75, + "learning_rate": 4.2545795593374594e-05, + "loss": 0.9417, + "step": 1871 + }, + { + "epoch": 2.75, + "learning_rate": 4.244931472301098e-05, + "loss": 0.8857, + "step": 1872 + }, + { + "epoch": 2.75, + "learning_rate": 4.235291388797986e-05, + "loss": 0.8865, + "step": 1873 + }, + { + "epoch": 2.75, + "learning_rate": 4.2256593222345185e-05, + "loss": 0.8899, + "step": 1874 + }, + { + "epoch": 2.75, + "learning_rate": 4.216035286005942e-05, + "loss": 0.9032, + "step": 1875 + }, + { + "epoch": 2.76, + "learning_rate": 4.206419293496333e-05, + "loss": 0.9283, + "step": 1876 + }, + { + "epoch": 2.76, + "learning_rate": 4.196811358078585e-05, + "loss": 0.852, + "step": 1877 + }, + { + "epoch": 2.76, + "learning_rate": 4.18721149311439e-05, + "loss": 0.9111, + "step": 1878 + }, + { + "epoch": 2.76, + "learning_rate": 4.177619711954211e-05, + "loss": 0.9561, + "step": 1879 + }, + { + "epoch": 2.76, + "learning_rate": 4.168036027937267e-05, + "loss": 0.8661, + "step": 1880 + }, + { + "epoch": 2.76, + "learning_rate": 4.1584604543915254e-05, + "loss": 0.9675, + "step": 1881 + }, + { + "epoch": 2.76, + "learning_rate": 4.148893004633663e-05, + "loss": 0.9231, + "step": 1882 + }, + { + "epoch": 2.77, + "learning_rate": 4.139333691969071e-05, + "loss": 0.8708, + "step": 1883 + }, + { + "epoch": 2.77, + "learning_rate": 4.129782529691815e-05, + "loss": 0.9466, + "step": 1884 + }, + { + "epoch": 2.77, + "learning_rate": 4.1202395310846296e-05, + "loss": 0.8492, + "step": 1885 + }, + { + "epoch": 2.77, + "learning_rate": 4.1107047094188946e-05, + "loss": 0.8113, + "step": 1886 + }, + { + "epoch": 2.77, + "learning_rate": 4.101178077954617e-05, + "loss": 0.8231, + "step": 1887 + }, + { + "epoch": 2.77, + "learning_rate": 4.091659649940419e-05, + "loss": 1.025, + "step": 1888 + }, + { + "epoch": 2.77, + "learning_rate": 4.082149438613514e-05, + "loss": 0.9734, + "step": 1889 + }, + { + "epoch": 2.78, + "learning_rate": 4.072647457199684e-05, + "loss": 0.9829, + "step": 1890 + }, + { + "epoch": 2.78, + "learning_rate": 4.063153718913267e-05, + "loss": 0.9899, + "step": 1891 + }, + { + "epoch": 2.78, + "learning_rate": 4.053668236957134e-05, + "loss": 0.9136, + "step": 1892 + }, + { + "epoch": 2.78, + "learning_rate": 4.044191024522686e-05, + "loss": 0.955, + "step": 1893 + }, + { + "epoch": 2.78, + "learning_rate": 4.034722094789809e-05, + "loss": 0.9592, + "step": 1894 + }, + { + "epoch": 2.78, + "learning_rate": 4.0252614609268766e-05, + "loss": 0.9513, + "step": 1895 + }, + { + "epoch": 2.79, + "learning_rate": 4.015809136090732e-05, + "loss": 0.8921, + "step": 1896 + }, + { + "epoch": 2.79, + "learning_rate": 4.0063651334266496e-05, + "loss": 0.9401, + "step": 1897 + }, + { + "epoch": 2.79, + "learning_rate": 3.996929466068344e-05, + "loss": 0.9658, + "step": 1898 + }, + { + "epoch": 2.79, + "learning_rate": 3.987502147137928e-05, + "loss": 1.0133, + "step": 1899 + }, + { + "epoch": 2.79, + "learning_rate": 3.978083189745907e-05, + "loss": 0.8799, + "step": 1900 + }, + { + "epoch": 2.79, + "learning_rate": 3.96867260699116e-05, + "loss": 0.9336, + "step": 1901 + }, + { + "epoch": 2.79, + "learning_rate": 3.9592704119609125e-05, + "loss": 0.9089, + "step": 1902 + }, + { + "epoch": 2.8, + "learning_rate": 3.9498766177307403e-05, + "loss": 0.9544, + "step": 1903 + }, + { + "epoch": 2.8, + "learning_rate": 3.9404912373645185e-05, + "loss": 0.877, + "step": 1904 + }, + { + "epoch": 2.8, + "learning_rate": 3.9311142839144365e-05, + "loss": 0.9766, + "step": 1905 + }, + { + "epoch": 2.8, + "learning_rate": 3.9217457704209536e-05, + "loss": 0.9153, + "step": 1906 + }, + { + "epoch": 2.8, + "learning_rate": 3.9123857099127936e-05, + "loss": 0.8915, + "step": 1907 + }, + { + "epoch": 2.8, + "learning_rate": 3.903034115406931e-05, + "loss": 0.8892, + "step": 1908 + }, + { + "epoch": 2.8, + "learning_rate": 3.893690999908562e-05, + "loss": 0.9365, + "step": 1909 + }, + { + "epoch": 2.81, + "learning_rate": 3.884356376411089e-05, + "loss": 0.9644, + "step": 1910 + }, + { + "epoch": 2.81, + "learning_rate": 3.875030257896105e-05, + "loss": 0.8724, + "step": 1911 + }, + { + "epoch": 2.81, + "learning_rate": 3.8657126573333804e-05, + "loss": 0.9454, + "step": 1912 + }, + { + "epoch": 2.81, + "learning_rate": 3.85640358768084e-05, + "loss": 0.9356, + "step": 1913 + }, + { + "epoch": 2.81, + "learning_rate": 3.8471030618845375e-05, + "loss": 0.9073, + "step": 1914 + }, + { + "epoch": 2.81, + "learning_rate": 3.837811092878649e-05, + "loss": 0.9627, + "step": 1915 + }, + { + "epoch": 2.82, + "learning_rate": 3.828527693585451e-05, + "loss": 0.9601, + "step": 1916 + }, + { + "epoch": 2.82, + "learning_rate": 3.819252876915297e-05, + "loss": 0.9001, + "step": 1917 + }, + { + "epoch": 2.82, + "learning_rate": 3.809986655766616e-05, + "loss": 0.9256, + "step": 1918 + }, + { + "epoch": 2.82, + "learning_rate": 3.800729043025871e-05, + "loss": 0.9052, + "step": 1919 + }, + { + "epoch": 2.82, + "learning_rate": 3.791480051567564e-05, + "loss": 1.0201, + "step": 1920 + }, + { + "epoch": 2.82, + "learning_rate": 3.7822396942542005e-05, + "loss": 0.9417, + "step": 1921 + }, + { + "epoch": 2.82, + "learning_rate": 3.7730079839362755e-05, + "loss": 0.8914, + "step": 1922 + }, + { + "epoch": 2.83, + "learning_rate": 3.76378493345227e-05, + "loss": 0.9132, + "step": 1923 + }, + { + "epoch": 2.83, + "learning_rate": 3.7545705556286126e-05, + "loss": 0.8873, + "step": 1924 + }, + { + "epoch": 2.83, + "learning_rate": 3.745364863279675e-05, + "loss": 0.9416, + "step": 1925 + }, + { + "epoch": 2.83, + "learning_rate": 3.7361678692077416e-05, + "loss": 0.9625, + "step": 1926 + }, + { + "epoch": 2.83, + "learning_rate": 3.726979586203019e-05, + "loss": 0.9371, + "step": 1927 + }, + { + "epoch": 2.83, + "learning_rate": 3.717800027043576e-05, + "loss": 0.9022, + "step": 1928 + }, + { + "epoch": 2.83, + "learning_rate": 3.708629204495371e-05, + "loss": 0.9038, + "step": 1929 + }, + { + "epoch": 2.84, + "learning_rate": 3.699467131312197e-05, + "loss": 0.9053, + "step": 1930 + }, + { + "epoch": 2.84, + "learning_rate": 3.6903138202356855e-05, + "loss": 1.0199, + "step": 1931 + }, + { + "epoch": 2.84, + "learning_rate": 3.681169283995279e-05, + "loss": 0.9524, + "step": 1932 + }, + { + "epoch": 2.84, + "learning_rate": 3.6720335353082246e-05, + "loss": 0.8613, + "step": 1933 + }, + { + "epoch": 2.84, + "learning_rate": 3.662906586879542e-05, + "loss": 0.9004, + "step": 1934 + }, + { + "epoch": 2.84, + "learning_rate": 3.653788451402009e-05, + "loss": 0.9132, + "step": 1935 + }, + { + "epoch": 2.85, + "learning_rate": 3.6446791415561574e-05, + "loss": 0.866, + "step": 1936 + }, + { + "epoch": 2.85, + "learning_rate": 3.635578670010242e-05, + "loss": 0.975, + "step": 1937 + }, + { + "epoch": 2.85, + "learning_rate": 3.626487049420223e-05, + "loss": 0.9129, + "step": 1938 + }, + { + "epoch": 2.85, + "learning_rate": 3.61740429242975e-05, + "loss": 0.9127, + "step": 1939 + }, + { + "epoch": 2.85, + "learning_rate": 3.6083304116701535e-05, + "loss": 0.9235, + "step": 1940 + }, + { + "epoch": 2.85, + "learning_rate": 3.599265419760408e-05, + "loss": 0.9644, + "step": 1941 + }, + { + "epoch": 2.85, + "learning_rate": 3.5902093293071425e-05, + "loss": 0.9893, + "step": 1942 + }, + { + "epoch": 2.86, + "learning_rate": 3.581162152904592e-05, + "loss": 0.967, + "step": 1943 + }, + { + "epoch": 2.86, + "learning_rate": 3.5721239031346066e-05, + "loss": 0.8631, + "step": 1944 + }, + { + "epoch": 2.86, + "learning_rate": 3.5630945925666134e-05, + "loss": 0.9066, + "step": 1945 + }, + { + "epoch": 2.86, + "learning_rate": 3.554074233757608e-05, + "loss": 1.0079, + "step": 1946 + }, + { + "epoch": 2.86, + "learning_rate": 3.545062839252147e-05, + "loss": 0.8998, + "step": 1947 + }, + { + "epoch": 2.86, + "learning_rate": 3.536060421582309e-05, + "loss": 0.9351, + "step": 1948 + }, + { + "epoch": 2.86, + "learning_rate": 3.5270669932676926e-05, + "loss": 1.0081, + "step": 1949 + }, + { + "epoch": 2.87, + "learning_rate": 3.518082566815396e-05, + "loss": 0.9089, + "step": 1950 + }, + { + "epoch": 2.87, + "learning_rate": 3.509107154719994e-05, + "loss": 0.9823, + "step": 1951 + }, + { + "epoch": 2.87, + "learning_rate": 3.500140769463533e-05, + "loss": 0.9625, + "step": 1952 + }, + { + "epoch": 2.87, + "learning_rate": 3.491183423515503e-05, + "loss": 0.8762, + "step": 1953 + }, + { + "epoch": 2.87, + "learning_rate": 3.48223512933282e-05, + "loss": 0.9355, + "step": 1954 + }, + { + "epoch": 2.87, + "learning_rate": 3.4732958993598154e-05, + "loss": 0.9384, + "step": 1955 + }, + { + "epoch": 2.87, + "learning_rate": 3.464365746028208e-05, + "loss": 0.9222, + "step": 1956 + }, + { + "epoch": 2.88, + "learning_rate": 3.455444681757105e-05, + "loss": 0.9472, + "step": 1957 + }, + { + "epoch": 2.88, + "learning_rate": 3.4465327189529664e-05, + "loss": 0.9375, + "step": 1958 + }, + { + "epoch": 2.88, + "learning_rate": 3.437629870009591e-05, + "loss": 0.8795, + "step": 1959 + }, + { + "epoch": 2.88, + "learning_rate": 3.428736147308115e-05, + "loss": 0.8276, + "step": 1960 + }, + { + "epoch": 2.88, + "learning_rate": 3.41985156321697e-05, + "loss": 1.0247, + "step": 1961 + }, + { + "epoch": 2.88, + "learning_rate": 3.410976130091892e-05, + "loss": 0.9808, + "step": 1962 + }, + { + "epoch": 2.89, + "learning_rate": 3.402109860275877e-05, + "loss": 0.9254, + "step": 1963 + }, + { + "epoch": 2.89, + "learning_rate": 3.393252766099187e-05, + "loss": 0.8803, + "step": 1964 + }, + { + "epoch": 2.89, + "learning_rate": 3.38440485987932e-05, + "loss": 0.9022, + "step": 1965 + }, + { + "epoch": 2.89, + "learning_rate": 3.375566153920992e-05, + "loss": 0.8599, + "step": 1966 + }, + { + "epoch": 2.89, + "learning_rate": 3.366736660516132e-05, + "loss": 0.9316, + "step": 1967 + }, + { + "epoch": 2.89, + "learning_rate": 3.3579163919438595e-05, + "loss": 0.993, + "step": 1968 + }, + { + "epoch": 2.89, + "learning_rate": 3.349105360470456e-05, + "loss": 0.984, + "step": 1969 + }, + { + "epoch": 2.9, + "learning_rate": 3.3403035783493605e-05, + "loss": 0.9208, + "step": 1970 + }, + { + "epoch": 2.9, + "learning_rate": 3.331511057821146e-05, + "loss": 0.9139, + "step": 1971 + }, + { + "epoch": 2.9, + "learning_rate": 3.322727811113516e-05, + "loss": 0.9367, + "step": 1972 + }, + { + "epoch": 2.9, + "learning_rate": 3.313953850441266e-05, + "loss": 0.9278, + "step": 1973 + }, + { + "epoch": 2.9, + "learning_rate": 3.305189188006281e-05, + "loss": 0.9714, + "step": 1974 + }, + { + "epoch": 2.9, + "learning_rate": 3.2964338359975134e-05, + "loss": 0.8818, + "step": 1975 + }, + { + "epoch": 2.9, + "learning_rate": 3.287687806590971e-05, + "loss": 1.0362, + "step": 1976 + }, + { + "epoch": 2.91, + "learning_rate": 3.2789511119496994e-05, + "loss": 0.9065, + "step": 1977 + }, + { + "epoch": 2.91, + "learning_rate": 3.270223764223755e-05, + "loss": 0.9571, + "step": 1978 + }, + { + "epoch": 2.91, + "learning_rate": 3.2615057755502e-05, + "loss": 0.9128, + "step": 1979 + }, + { + "epoch": 2.91, + "learning_rate": 3.252797158053077e-05, + "loss": 0.8249, + "step": 1980 + }, + { + "epoch": 2.91, + "learning_rate": 3.244097923843398e-05, + "loss": 1.012, + "step": 1981 + }, + { + "epoch": 2.91, + "learning_rate": 3.2354080850191324e-05, + "loss": 0.854, + "step": 1982 + }, + { + "epoch": 2.92, + "learning_rate": 3.226727653665171e-05, + "loss": 0.8708, + "step": 1983 + }, + { + "epoch": 2.92, + "learning_rate": 3.218056641853337e-05, + "loss": 0.8986, + "step": 1984 + }, + { + "epoch": 2.92, + "learning_rate": 3.2093950616423394e-05, + "loss": 0.9326, + "step": 1985 + }, + { + "epoch": 2.92, + "learning_rate": 3.200742925077775e-05, + "loss": 0.941, + "step": 1986 + }, + { + "epoch": 2.92, + "learning_rate": 3.192100244192116e-05, + "loss": 1.0446, + "step": 1987 + }, + { + "epoch": 2.92, + "learning_rate": 3.1834670310046734e-05, + "loss": 0.971, + "step": 1988 + }, + { + "epoch": 2.92, + "learning_rate": 3.174843297521596e-05, + "loss": 0.9548, + "step": 1989 + }, + { + "epoch": 2.93, + "learning_rate": 3.166229055735848e-05, + "loss": 0.8474, + "step": 1990 + }, + { + "epoch": 2.93, + "learning_rate": 3.157624317627195e-05, + "loss": 0.8806, + "step": 1991 + }, + { + "epoch": 2.93, + "learning_rate": 3.1490290951621904e-05, + "loss": 0.9161, + "step": 1992 + }, + { + "epoch": 2.93, + "learning_rate": 3.140443400294146e-05, + "loss": 0.9025, + "step": 1993 + }, + { + "epoch": 2.93, + "learning_rate": 3.1318672449631284e-05, + "loss": 0.9566, + "step": 1994 + }, + { + "epoch": 2.93, + "learning_rate": 3.123300641095935e-05, + "loss": 0.8958, + "step": 1995 + }, + { + "epoch": 2.93, + "learning_rate": 3.114743600606078e-05, + "loss": 0.9001, + "step": 1996 + }, + { + "epoch": 2.94, + "learning_rate": 3.106196135393782e-05, + "loss": 0.993, + "step": 1997 + }, + { + "epoch": 2.94, + "learning_rate": 3.09765825734594e-05, + "loss": 0.932, + "step": 1998 + }, + { + "epoch": 2.94, + "learning_rate": 3.089129978336118e-05, + "loss": 0.9425, + "step": 1999 + }, + { + "epoch": 2.94, + "learning_rate": 3.080611310224539e-05, + "loss": 0.8835, + "step": 2000 + }, + { + "epoch": 2.94, + "learning_rate": 3.0721022648580486e-05, + "loss": 0.9171, + "step": 2001 + }, + { + "epoch": 2.94, + "learning_rate": 3.063602854070123e-05, + "loss": 0.8977, + "step": 2002 + }, + { + "epoch": 2.94, + "learning_rate": 3.055113089680829e-05, + "loss": 1.0178, + "step": 2003 + }, + { + "epoch": 2.95, + "learning_rate": 3.0466329834968233e-05, + "loss": 0.906, + "step": 2004 + }, + { + "epoch": 2.95, + "learning_rate": 3.0381625473113284e-05, + "loss": 0.9512, + "step": 2005 + }, + { + "epoch": 2.95, + "learning_rate": 3.029701792904117e-05, + "loss": 0.9653, + "step": 2006 + }, + { + "epoch": 2.95, + "learning_rate": 3.0212507320415052e-05, + "loss": 0.9445, + "step": 2007 + }, + { + "epoch": 2.95, + "learning_rate": 3.0128093764763254e-05, + "loss": 0.9406, + "step": 2008 + }, + { + "epoch": 2.95, + "learning_rate": 3.0043777379479098e-05, + "loss": 0.9888, + "step": 2009 + }, + { + "epoch": 2.96, + "learning_rate": 2.9959558281820766e-05, + "loss": 0.8496, + "step": 2010 + }, + { + "epoch": 2.96, + "learning_rate": 2.9875436588911153e-05, + "loss": 0.9583, + "step": 2011 + }, + { + "epoch": 2.96, + "learning_rate": 2.979141241773775e-05, + "loss": 0.9488, + "step": 2012 + }, + { + "epoch": 2.96, + "learning_rate": 2.9707485885152363e-05, + "loss": 0.9339, + "step": 2013 + }, + { + "epoch": 2.96, + "learning_rate": 2.9623657107870996e-05, + "loss": 0.998, + "step": 2014 + }, + { + "epoch": 2.96, + "learning_rate": 2.953992620247379e-05, + "loss": 0.9076, + "step": 2015 + }, + { + "epoch": 2.96, + "learning_rate": 2.945629328540468e-05, + "loss": 0.9596, + "step": 2016 + }, + { + "epoch": 2.96, + "eval_loss": 0.9915859699249268, + "eval_runtime": 2.6122, + "eval_samples_per_second": 418.031, + "eval_steps_per_second": 26.414, + "step": 2016 + }, + { + "epoch": 2.97, + "learning_rate": 2.9372758472971417e-05, + "loss": 0.9104, + "step": 2017 + }, + { + "epoch": 2.97, + "learning_rate": 2.9289321881345254e-05, + "loss": 1.0079, + "step": 2018 + }, + { + "epoch": 2.97, + "learning_rate": 2.9205983626560874e-05, + "loss": 0.9927, + "step": 2019 + }, + { + "epoch": 2.97, + "learning_rate": 2.9122743824516195e-05, + "loss": 0.9863, + "step": 2020 + }, + { + "epoch": 2.97, + "learning_rate": 2.9039602590972205e-05, + "loss": 0.8651, + "step": 2021 + }, + { + "epoch": 2.97, + "learning_rate": 2.8956560041552882e-05, + "loss": 0.9449, + "step": 2022 + }, + { + "epoch": 2.97, + "learning_rate": 2.8873616291744843e-05, + "loss": 0.9486, + "step": 2023 + }, + { + "epoch": 2.98, + "learning_rate": 2.879077145689746e-05, + "loss": 0.9713, + "step": 2024 + }, + { + "epoch": 2.98, + "learning_rate": 2.8708025652222415e-05, + "loss": 0.8496, + "step": 2025 + }, + { + "epoch": 2.98, + "learning_rate": 2.8625378992793696e-05, + "loss": 0.9048, + "step": 2026 + }, + { + "epoch": 2.98, + "learning_rate": 2.854283159354748e-05, + "loss": 0.9396, + "step": 2027 + }, + { + "epoch": 2.98, + "learning_rate": 2.8460383569281824e-05, + "loss": 0.9327, + "step": 2028 + }, + { + "epoch": 2.98, + "learning_rate": 2.8378035034656625e-05, + "loss": 1.0179, + "step": 2029 + }, + { + "epoch": 2.99, + "learning_rate": 2.8295786104193366e-05, + "loss": 1.0093, + "step": 2030 + }, + { + "epoch": 2.99, + "learning_rate": 2.8213636892275087e-05, + "loss": 1.0036, + "step": 2031 + }, + { + "epoch": 2.99, + "learning_rate": 2.8131587513146164e-05, + "loss": 0.9032, + "step": 2032 + }, + { + "epoch": 2.99, + "learning_rate": 2.8049638080912044e-05, + "loss": 0.9796, + "step": 2033 + }, + { + "epoch": 2.99, + "learning_rate": 2.7967788709539233e-05, + "loss": 0.9627, + "step": 2034 + }, + { + "epoch": 2.99, + "learning_rate": 2.7886039512855065e-05, + "loss": 0.8332, + "step": 2035 + }, + { + "epoch": 2.99, + "learning_rate": 2.7804390604547557e-05, + "loss": 0.9075, + "step": 2036 + }, + { + "epoch": 3.0, + "learning_rate": 2.7722842098165325e-05, + "loss": 0.8997, + "step": 2037 + }, + { + "epoch": 3.0, + "learning_rate": 2.7641394107117226e-05, + "loss": 1.0088, + "step": 2038 + }, + { + "epoch": 3.0, + "learning_rate": 2.7560046744672495e-05, + "loss": 0.9538, + "step": 2039 + }, + { + "epoch": 3.0, + "learning_rate": 2.747880012396028e-05, + "loss": 0.9061, + "step": 2040 + }, + { + "epoch": 3.0, + "learning_rate": 2.739765435796967e-05, + "loss": 0.9441, + "step": 2041 + }, + { + "epoch": 3.0, + "learning_rate": 2.7316609559549565e-05, + "loss": 0.9793, + "step": 2042 + }, + { + "epoch": 3.0, + "learning_rate": 2.7235665841408366e-05, + "loss": 0.9078, + "step": 2043 + }, + { + "epoch": 3.01, + "learning_rate": 2.7154823316113932e-05, + "loss": 0.9274, + "step": 2044 + }, + { + "epoch": 3.01, + "learning_rate": 2.707408209609339e-05, + "loss": 0.9687, + "step": 2045 + }, + { + "epoch": 3.01, + "learning_rate": 2.6993442293632963e-05, + "loss": 0.985, + "step": 2046 + }, + { + "epoch": 3.01, + "learning_rate": 2.691290402087787e-05, + "loss": 0.9312, + "step": 2047 + }, + { + "epoch": 3.01, + "learning_rate": 2.6832467389832173e-05, + "loss": 0.8891, + "step": 2048 + }, + { + "epoch": 3.01, + "learning_rate": 2.6752132512358475e-05, + "loss": 0.8605, + "step": 2049 + }, + { + "epoch": 3.01, + "learning_rate": 2.667189950017793e-05, + "loss": 0.9902, + "step": 2050 + }, + { + "epoch": 3.02, + "learning_rate": 2.6591768464870016e-05, + "loss": 0.9078, + "step": 2051 + }, + { + "epoch": 3.02, + "learning_rate": 2.6511739517872426e-05, + "loss": 0.956, + "step": 2052 + }, + { + "epoch": 3.02, + "learning_rate": 2.6431812770480836e-05, + "loss": 0.8834, + "step": 2053 + }, + { + "epoch": 3.02, + "learning_rate": 2.6351988333848788e-05, + "loss": 0.8702, + "step": 2054 + }, + { + "epoch": 3.0, + "learning_rate": 2.6272266318987603e-05, + "loss": 0.9522, + "step": 2055 + }, + { + "epoch": 3.0, + "learning_rate": 2.6192646836766088e-05, + "loss": 0.9338, + "step": 2056 + }, + { + "epoch": 3.0, + "learning_rate": 2.6113129997910547e-05, + "loss": 0.8851, + "step": 2057 + }, + { + "epoch": 3.01, + "learning_rate": 2.6033715913004463e-05, + "loss": 0.7981, + "step": 2058 + }, + { + "epoch": 3.01, + "learning_rate": 2.5954404692488433e-05, + "loss": 0.799, + "step": 2059 + }, + { + "epoch": 3.01, + "learning_rate": 2.587519644666001e-05, + "loss": 0.8267, + "step": 2060 + }, + { + "epoch": 3.01, + "learning_rate": 2.579609128567353e-05, + "loss": 0.846, + "step": 2061 + }, + { + "epoch": 3.01, + "learning_rate": 2.571708931954e-05, + "loss": 0.8217, + "step": 2062 + }, + { + "epoch": 3.01, + "learning_rate": 2.5638190658126938e-05, + "loss": 0.905, + "step": 2063 + }, + { + "epoch": 3.01, + "learning_rate": 2.5559395411158115e-05, + "loss": 0.8105, + "step": 2064 + }, + { + "epoch": 3.02, + "learning_rate": 2.5480703688213524e-05, + "loss": 0.8824, + "step": 2065 + }, + { + "epoch": 3.02, + "learning_rate": 2.5402115598729182e-05, + "loss": 0.9279, + "step": 2066 + }, + { + "epoch": 3.02, + "learning_rate": 2.5323631251997026e-05, + "loss": 0.9431, + "step": 2067 + }, + { + "epoch": 3.02, + "learning_rate": 2.5245250757164663e-05, + "loss": 0.8822, + "step": 2068 + }, + { + "epoch": 3.02, + "learning_rate": 2.5166974223235296e-05, + "loss": 0.8182, + "step": 2069 + }, + { + "epoch": 3.02, + "learning_rate": 2.5088801759067528e-05, + "loss": 0.9177, + "step": 2070 + }, + { + "epoch": 3.03, + "learning_rate": 2.501073347337526e-05, + "loss": 0.8085, + "step": 2071 + }, + { + "epoch": 3.03, + "learning_rate": 2.493276947472756e-05, + "loss": 0.9009, + "step": 2072 + }, + { + "epoch": 3.03, + "learning_rate": 2.485490987154837e-05, + "loss": 0.9113, + "step": 2073 + }, + { + "epoch": 3.03, + "learning_rate": 2.4777154772116496e-05, + "loss": 0.8268, + "step": 2074 + }, + { + "epoch": 3.03, + "learning_rate": 2.4699504284565412e-05, + "loss": 0.9038, + "step": 2075 + }, + { + "epoch": 3.03, + "learning_rate": 2.462195851688306e-05, + "loss": 0.9268, + "step": 2076 + }, + { + "epoch": 3.03, + "learning_rate": 2.4544517576911862e-05, + "loss": 0.7817, + "step": 2077 + }, + { + "epoch": 3.04, + "learning_rate": 2.4467181572348318e-05, + "loss": 0.9028, + "step": 2078 + }, + { + "epoch": 3.04, + "learning_rate": 2.438995061074314e-05, + "loss": 0.8584, + "step": 2079 + }, + { + "epoch": 3.04, + "learning_rate": 2.4312824799500844e-05, + "loss": 0.9867, + "step": 2080 + }, + { + "epoch": 3.04, + "learning_rate": 2.4235804245879723e-05, + "loss": 0.9181, + "step": 2081 + }, + { + "epoch": 3.04, + "learning_rate": 2.4158889056991775e-05, + "loss": 0.8883, + "step": 2082 + }, + { + "epoch": 3.04, + "learning_rate": 2.4082079339802378e-05, + "loss": 0.9067, + "step": 2083 + }, + { + "epoch": 3.04, + "learning_rate": 2.4005375201130274e-05, + "loss": 0.8907, + "step": 2084 + }, + { + "epoch": 3.05, + "learning_rate": 2.392877674764734e-05, + "loss": 0.9214, + "step": 2085 + }, + { + "epoch": 3.05, + "learning_rate": 2.3852284085878517e-05, + "loss": 0.8258, + "step": 2086 + }, + { + "epoch": 3.05, + "learning_rate": 2.377589732220167e-05, + "loss": 0.8416, + "step": 2087 + }, + { + "epoch": 3.05, + "learning_rate": 2.369961656284727e-05, + "loss": 0.8781, + "step": 2088 + }, + { + "epoch": 3.05, + "learning_rate": 2.362344191389846e-05, + "loss": 0.8864, + "step": 2089 + }, + { + "epoch": 3.05, + "learning_rate": 2.354737348129077e-05, + "loss": 0.9058, + "step": 2090 + }, + { + "epoch": 3.06, + "learning_rate": 2.3471411370812014e-05, + "loss": 0.87, + "step": 2091 + }, + { + "epoch": 3.06, + "learning_rate": 2.339555568810221e-05, + "loss": 0.966, + "step": 2092 + }, + { + "epoch": 3.06, + "learning_rate": 2.33198065386533e-05, + "loss": 0.9634, + "step": 2093 + }, + { + "epoch": 3.06, + "learning_rate": 2.324416402780907e-05, + "loss": 0.9022, + "step": 2094 + }, + { + "epoch": 3.06, + "learning_rate": 2.316862826076507e-05, + "loss": 0.875, + "step": 2095 + }, + { + "epoch": 3.06, + "learning_rate": 2.3093199342568318e-05, + "loss": 0.9033, + "step": 2096 + }, + { + "epoch": 3.06, + "learning_rate": 2.3017877378117326e-05, + "loss": 0.9128, + "step": 2097 + }, + { + "epoch": 3.07, + "learning_rate": 2.294266247216178e-05, + "loss": 0.8851, + "step": 2098 + }, + { + "epoch": 3.07, + "learning_rate": 2.2867554729302542e-05, + "loss": 0.8791, + "step": 2099 + }, + { + "epoch": 3.07, + "learning_rate": 2.2792554253991415e-05, + "loss": 0.978, + "step": 2100 + }, + { + "epoch": 3.07, + "learning_rate": 2.271766115053099e-05, + "loss": 0.8423, + "step": 2101 + }, + { + "epoch": 3.07, + "learning_rate": 2.2642875523074613e-05, + "loss": 0.9984, + "step": 2102 + }, + { + "epoch": 3.07, + "learning_rate": 2.2568197475626175e-05, + "loss": 0.8152, + "step": 2103 + }, + { + "epoch": 3.07, + "learning_rate": 2.249362711203985e-05, + "loss": 0.9389, + "step": 2104 + }, + { + "epoch": 3.08, + "learning_rate": 2.241916453602011e-05, + "loss": 0.9301, + "step": 2105 + }, + { + "epoch": 3.08, + "learning_rate": 2.2344809851121583e-05, + "loss": 0.8169, + "step": 2106 + }, + { + "epoch": 3.08, + "learning_rate": 2.2270563160748793e-05, + "loss": 0.9564, + "step": 2107 + }, + { + "epoch": 3.08, + "learning_rate": 2.2196424568156073e-05, + "loss": 0.9204, + "step": 2108 + }, + { + "epoch": 3.08, + "learning_rate": 2.2122394176447416e-05, + "loss": 0.9038, + "step": 2109 + }, + { + "epoch": 3.08, + "learning_rate": 2.204847208857642e-05, + "loss": 0.8507, + "step": 2110 + }, + { + "epoch": 3.08, + "learning_rate": 2.197465840734596e-05, + "loss": 0.8936, + "step": 2111 + }, + { + "epoch": 3.09, + "learning_rate": 2.190095323540825e-05, + "loss": 0.9433, + "step": 2112 + }, + { + "epoch": 3.09, + "learning_rate": 2.1827356675264542e-05, + "loss": 0.899, + "step": 2113 + }, + { + "epoch": 3.09, + "learning_rate": 2.1753868829265046e-05, + "loss": 0.7804, + "step": 2114 + }, + { + "epoch": 3.09, + "learning_rate": 2.1680489799608762e-05, + "loss": 0.8703, + "step": 2115 + }, + { + "epoch": 3.09, + "learning_rate": 2.160721968834344e-05, + "loss": 0.8523, + "step": 2116 + }, + { + "epoch": 3.09, + "learning_rate": 2.153405859736528e-05, + "loss": 0.9138, + "step": 2117 + }, + { + "epoch": 3.1, + "learning_rate": 2.146100662841889e-05, + "loss": 0.8954, + "step": 2118 + }, + { + "epoch": 3.1, + "learning_rate": 2.1388063883097152e-05, + "loss": 0.8585, + "step": 2119 + }, + { + "epoch": 3.1, + "learning_rate": 2.1315230462840985e-05, + "loss": 0.9287, + "step": 2120 + }, + { + "epoch": 3.1, + "learning_rate": 2.1242506468939383e-05, + "loss": 0.8709, + "step": 2121 + }, + { + "epoch": 3.1, + "learning_rate": 2.1169892002529047e-05, + "loss": 0.9009, + "step": 2122 + }, + { + "epoch": 3.1, + "learning_rate": 2.1097387164594406e-05, + "loss": 0.9042, + "step": 2123 + }, + { + "epoch": 3.1, + "learning_rate": 2.102499205596743e-05, + "loss": 0.8884, + "step": 2124 + }, + { + "epoch": 3.11, + "learning_rate": 2.095270677732746e-05, + "loss": 0.965, + "step": 2125 + }, + { + "epoch": 3.11, + "learning_rate": 2.0880531429201145e-05, + "loss": 0.8886, + "step": 2126 + }, + { + "epoch": 3.11, + "learning_rate": 2.0808466111962264e-05, + "loss": 0.8881, + "step": 2127 + }, + { + "epoch": 3.11, + "learning_rate": 2.0736510925831532e-05, + "loss": 0.8371, + "step": 2128 + }, + { + "epoch": 3.11, + "learning_rate": 2.0664665970876496e-05, + "loss": 0.9097, + "step": 2129 + }, + { + "epoch": 3.11, + "learning_rate": 2.0592931347011423e-05, + "loss": 0.9366, + "step": 2130 + }, + { + "epoch": 3.11, + "learning_rate": 2.0521307153997182e-05, + "loss": 0.8485, + "step": 2131 + }, + { + "epoch": 3.12, + "learning_rate": 2.0449793491441028e-05, + "loss": 0.8103, + "step": 2132 + }, + { + "epoch": 3.12, + "learning_rate": 2.037839045879646e-05, + "loss": 0.9271, + "step": 2133 + }, + { + "epoch": 3.12, + "learning_rate": 2.0307098155363236e-05, + "loss": 0.8905, + "step": 2134 + }, + { + "epoch": 3.12, + "learning_rate": 2.0235916680287015e-05, + "loss": 0.8801, + "step": 2135 + }, + { + "epoch": 3.12, + "learning_rate": 2.0164846132559402e-05, + "loss": 0.8613, + "step": 2136 + }, + { + "epoch": 3.12, + "learning_rate": 2.009388661101771e-05, + "loss": 0.8611, + "step": 2137 + }, + { + "epoch": 3.13, + "learning_rate": 2.0023038214344823e-05, + "loss": 0.9265, + "step": 2138 + }, + { + "epoch": 3.13, + "learning_rate": 1.9952301041069122e-05, + "loss": 0.8643, + "step": 2139 + }, + { + "epoch": 3.13, + "learning_rate": 1.9881675189564254e-05, + "loss": 0.8263, + "step": 2140 + }, + { + "epoch": 3.13, + "learning_rate": 1.9811160758049163e-05, + "loss": 0.8731, + "step": 2141 + }, + { + "epoch": 3.13, + "learning_rate": 1.974075784458771e-05, + "loss": 0.8522, + "step": 2142 + }, + { + "epoch": 3.13, + "learning_rate": 1.9670466547088773e-05, + "loss": 0.848, + "step": 2143 + }, + { + "epoch": 3.13, + "learning_rate": 1.9600286963305957e-05, + "loss": 0.9803, + "step": 2144 + }, + { + "epoch": 3.14, + "learning_rate": 1.9530219190837485e-05, + "loss": 0.8974, + "step": 2145 + }, + { + "epoch": 3.14, + "learning_rate": 1.946026332712615e-05, + "loss": 0.8892, + "step": 2146 + }, + { + "epoch": 3.14, + "learning_rate": 1.9390419469459065e-05, + "loss": 0.8785, + "step": 2147 + }, + { + "epoch": 3.14, + "learning_rate": 1.93206877149676e-05, + "loss": 0.9063, + "step": 2148 + }, + { + "epoch": 3.14, + "learning_rate": 1.9251068160627173e-05, + "loss": 0.8974, + "step": 2149 + }, + { + "epoch": 3.14, + "learning_rate": 1.9181560903257232e-05, + "loss": 0.8693, + "step": 2150 + }, + { + "epoch": 3.14, + "learning_rate": 1.9112166039521073e-05, + "loss": 0.8972, + "step": 2151 + }, + { + "epoch": 3.15, + "learning_rate": 1.9042883665925604e-05, + "loss": 0.9402, + "step": 2152 + }, + { + "epoch": 3.15, + "learning_rate": 1.897371387882134e-05, + "loss": 0.8868, + "step": 2153 + }, + { + "epoch": 3.15, + "learning_rate": 1.8904656774402208e-05, + "loss": 0.9122, + "step": 2154 + }, + { + "epoch": 3.15, + "learning_rate": 1.8835712448705424e-05, + "loss": 0.8585, + "step": 2155 + }, + { + "epoch": 3.15, + "learning_rate": 1.8766880997611424e-05, + "loss": 0.8969, + "step": 2156 + }, + { + "epoch": 3.15, + "learning_rate": 1.8698162516843552e-05, + "loss": 0.8787, + "step": 2157 + }, + { + "epoch": 3.15, + "learning_rate": 1.86295571019682e-05, + "loss": 0.8505, + "step": 2158 + }, + { + "epoch": 3.16, + "learning_rate": 1.8561064848394382e-05, + "loss": 0.869, + "step": 2159 + }, + { + "epoch": 3.16, + "learning_rate": 1.849268585137377e-05, + "loss": 0.8601, + "step": 2160 + }, + { + "epoch": 3.16, + "learning_rate": 1.8424420206000616e-05, + "loss": 0.8734, + "step": 2161 + }, + { + "epoch": 3.16, + "learning_rate": 1.835626800721144e-05, + "loss": 0.9073, + "step": 2162 + }, + { + "epoch": 3.16, + "learning_rate": 1.8288229349785025e-05, + "loss": 0.8833, + "step": 2163 + }, + { + "epoch": 3.16, + "learning_rate": 1.8220304328342252e-05, + "loss": 0.8833, + "step": 2164 + }, + { + "epoch": 3.17, + "learning_rate": 1.815249303734594e-05, + "loss": 0.7653, + "step": 2165 + }, + { + "epoch": 3.17, + "learning_rate": 1.808479557110081e-05, + "loss": 0.9249, + "step": 2166 + }, + { + "epoch": 3.17, + "learning_rate": 1.801721202375327e-05, + "loss": 0.9382, + "step": 2167 + }, + { + "epoch": 3.17, + "learning_rate": 1.7949742489291255e-05, + "loss": 0.8937, + "step": 2168 + }, + { + "epoch": 3.17, + "learning_rate": 1.7882387061544182e-05, + "loss": 0.9076, + "step": 2169 + }, + { + "epoch": 3.17, + "learning_rate": 1.7815145834182734e-05, + "loss": 0.8711, + "step": 2170 + }, + { + "epoch": 3.17, + "learning_rate": 1.7748018900718854e-05, + "loss": 0.8599, + "step": 2171 + }, + { + "epoch": 3.18, + "learning_rate": 1.7681006354505493e-05, + "loss": 0.9874, + "step": 2172 + }, + { + "epoch": 3.18, + "learning_rate": 1.7614108288736486e-05, + "loss": 0.8984, + "step": 2173 + }, + { + "epoch": 3.18, + "learning_rate": 1.754732479644655e-05, + "loss": 0.8941, + "step": 2174 + }, + { + "epoch": 3.18, + "learning_rate": 1.7480655970510985e-05, + "loss": 0.9727, + "step": 2175 + }, + { + "epoch": 3.18, + "learning_rate": 1.7414101903645684e-05, + "loss": 0.8093, + "step": 2176 + }, + { + "epoch": 3.18, + "learning_rate": 1.7347662688406908e-05, + "loss": 0.8109, + "step": 2177 + }, + { + "epoch": 3.18, + "learning_rate": 1.7281338417191205e-05, + "loss": 0.9079, + "step": 2178 + }, + { + "epoch": 3.19, + "learning_rate": 1.721512918223527e-05, + "loss": 0.9269, + "step": 2179 + }, + { + "epoch": 3.19, + "learning_rate": 1.7149035075615794e-05, + "loss": 0.8874, + "step": 2180 + }, + { + "epoch": 3.19, + "learning_rate": 1.7083056189249412e-05, + "loss": 0.8941, + "step": 2181 + }, + { + "epoch": 3.19, + "learning_rate": 1.7017192614892508e-05, + "loss": 0.8195, + "step": 2182 + }, + { + "epoch": 3.19, + "learning_rate": 1.6951444444141084e-05, + "loss": 0.9034, + "step": 2183 + }, + { + "epoch": 3.19, + "learning_rate": 1.688581176843066e-05, + "loss": 0.8771, + "step": 2184 + }, + { + "epoch": 3.19, + "eval_loss": 1.001231074333191, + "eval_runtime": 2.6132, + "eval_samples_per_second": 417.878, + "eval_steps_per_second": 26.404, + "step": 2184 + }, + { + "epoch": 3.2, + "learning_rate": 1.6820294679036087e-05, + "loss": 0.9257, + "step": 2185 + }, + { + "epoch": 3.2, + "learning_rate": 1.675489326707159e-05, + "loss": 0.8702, + "step": 2186 + }, + { + "epoch": 3.2, + "learning_rate": 1.668960762349042e-05, + "loss": 0.8393, + "step": 2187 + }, + { + "epoch": 3.2, + "learning_rate": 1.6624437839084862e-05, + "loss": 0.891, + "step": 2188 + }, + { + "epoch": 3.2, + "learning_rate": 1.6559384004486055e-05, + "loss": 0.8837, + "step": 2189 + }, + { + "epoch": 3.2, + "learning_rate": 1.6494446210163926e-05, + "loss": 0.8943, + "step": 2190 + }, + { + "epoch": 3.2, + "learning_rate": 1.6429624546427034e-05, + "loss": 0.9046, + "step": 2191 + }, + { + "epoch": 3.21, + "learning_rate": 1.6364919103422393e-05, + "loss": 0.7765, + "step": 2192 + }, + { + "epoch": 3.21, + "learning_rate": 1.6300329971135397e-05, + "loss": 0.8723, + "step": 2193 + }, + { + "epoch": 3.21, + "learning_rate": 1.6235857239389696e-05, + "loss": 0.8941, + "step": 2194 + }, + { + "epoch": 3.21, + "learning_rate": 1.6171500997847054e-05, + "loss": 0.8349, + "step": 2195 + }, + { + "epoch": 3.21, + "learning_rate": 1.6107261336007285e-05, + "loss": 0.7731, + "step": 2196 + }, + { + "epoch": 3.21, + "learning_rate": 1.6043138343207975e-05, + "loss": 0.936, + "step": 2197 + }, + { + "epoch": 3.21, + "learning_rate": 1.5979132108624574e-05, + "loss": 0.8672, + "step": 2198 + }, + { + "epoch": 3.22, + "learning_rate": 1.5915242721270074e-05, + "loss": 0.8693, + "step": 2199 + }, + { + "epoch": 3.22, + "learning_rate": 1.5851470269994984e-05, + "loss": 0.8971, + "step": 2200 + }, + { + "epoch": 3.22, + "learning_rate": 1.5787814843487226e-05, + "loss": 0.9144, + "step": 2201 + }, + { + "epoch": 3.22, + "learning_rate": 1.5724276530271965e-05, + "loss": 0.9183, + "step": 2202 + }, + { + "epoch": 3.22, + "learning_rate": 1.566085541871145e-05, + "loss": 0.8262, + "step": 2203 + }, + { + "epoch": 3.22, + "learning_rate": 1.5597551597004966e-05, + "loss": 0.8459, + "step": 2204 + }, + { + "epoch": 3.23, + "learning_rate": 1.5534365153188723e-05, + "loss": 0.9236, + "step": 2205 + }, + { + "epoch": 3.23, + "learning_rate": 1.5471296175135674e-05, + "loss": 0.9021, + "step": 2206 + }, + { + "epoch": 3.23, + "learning_rate": 1.5408344750555383e-05, + "loss": 0.8881, + "step": 2207 + }, + { + "epoch": 3.23, + "learning_rate": 1.534551096699396e-05, + "loss": 0.8462, + "step": 2208 + }, + { + "epoch": 3.23, + "learning_rate": 1.5282794911833887e-05, + "loss": 0.9467, + "step": 2209 + }, + { + "epoch": 3.23, + "learning_rate": 1.5220196672293929e-05, + "loss": 0.8647, + "step": 2210 + }, + { + "epoch": 3.23, + "learning_rate": 1.5157716335429061e-05, + "loss": 0.9548, + "step": 2211 + }, + { + "epoch": 3.24, + "learning_rate": 1.5095353988130235e-05, + "loss": 0.858, + "step": 2212 + }, + { + "epoch": 3.24, + "learning_rate": 1.5033109717124283e-05, + "loss": 0.883, + "step": 2213 + }, + { + "epoch": 3.24, + "learning_rate": 1.4970983608973942e-05, + "loss": 0.9509, + "step": 2214 + }, + { + "epoch": 3.24, + "learning_rate": 1.4908975750077491e-05, + "loss": 0.8861, + "step": 2215 + }, + { + "epoch": 3.24, + "learning_rate": 1.4847086226668872e-05, + "loss": 0.8819, + "step": 2216 + }, + { + "epoch": 3.24, + "learning_rate": 1.478531512481739e-05, + "loss": 0.8183, + "step": 2217 + }, + { + "epoch": 3.24, + "learning_rate": 1.4723662530427673e-05, + "loss": 0.8612, + "step": 2218 + }, + { + "epoch": 3.25, + "learning_rate": 1.4662128529239572e-05, + "loss": 0.8367, + "step": 2219 + }, + { + "epoch": 3.25, + "learning_rate": 1.4600713206827932e-05, + "loss": 0.9016, + "step": 2220 + }, + { + "epoch": 3.25, + "learning_rate": 1.4539416648602656e-05, + "loss": 0.85, + "step": 2221 + }, + { + "epoch": 3.25, + "learning_rate": 1.4478238939808453e-05, + "loss": 0.8941, + "step": 2222 + }, + { + "epoch": 3.25, + "learning_rate": 1.4417180165524714e-05, + "loss": 0.8326, + "step": 2223 + }, + { + "epoch": 3.25, + "learning_rate": 1.4356240410665433e-05, + "loss": 0.8711, + "step": 2224 + }, + { + "epoch": 3.25, + "learning_rate": 1.429541975997908e-05, + "loss": 0.8943, + "step": 2225 + }, + { + "epoch": 3.26, + "learning_rate": 1.4234718298048555e-05, + "loss": 0.9355, + "step": 2226 + }, + { + "epoch": 3.26, + "learning_rate": 1.4174136109290925e-05, + "loss": 0.9106, + "step": 2227 + }, + { + "epoch": 3.26, + "learning_rate": 1.4113673277957395e-05, + "loss": 0.9101, + "step": 2228 + }, + { + "epoch": 3.26, + "learning_rate": 1.4053329888133238e-05, + "loss": 0.9257, + "step": 2229 + }, + { + "epoch": 3.26, + "learning_rate": 1.3993106023737546e-05, + "loss": 0.9317, + "step": 2230 + }, + { + "epoch": 3.26, + "learning_rate": 1.3933001768523269e-05, + "loss": 0.8653, + "step": 2231 + }, + { + "epoch": 3.27, + "learning_rate": 1.3873017206076933e-05, + "loss": 0.8959, + "step": 2232 + }, + { + "epoch": 3.27, + "learning_rate": 1.3813152419818654e-05, + "loss": 1.0166, + "step": 2233 + }, + { + "epoch": 3.27, + "learning_rate": 1.3753407493001968e-05, + "loss": 0.8364, + "step": 2234 + }, + { + "epoch": 3.27, + "learning_rate": 1.3693782508713704e-05, + "loss": 0.9402, + "step": 2235 + }, + { + "epoch": 3.27, + "learning_rate": 1.3634277549873953e-05, + "loss": 0.9119, + "step": 2236 + }, + { + "epoch": 3.27, + "learning_rate": 1.3574892699235797e-05, + "loss": 0.9017, + "step": 2237 + }, + { + "epoch": 3.27, + "learning_rate": 1.3515628039385365e-05, + "loss": 0.9208, + "step": 2238 + }, + { + "epoch": 3.28, + "learning_rate": 1.3456483652741591e-05, + "loss": 0.9155, + "step": 2239 + }, + { + "epoch": 3.28, + "learning_rate": 1.339745962155613e-05, + "loss": 0.915, + "step": 2240 + }, + { + "epoch": 3.28, + "learning_rate": 1.333855602791333e-05, + "loss": 0.8611, + "step": 2241 + }, + { + "epoch": 3.28, + "learning_rate": 1.3279772953729986e-05, + "loss": 0.8849, + "step": 2242 + }, + { + "epoch": 3.28, + "learning_rate": 1.3221110480755305e-05, + "loss": 0.8616, + "step": 2243 + }, + { + "epoch": 3.28, + "learning_rate": 1.3162568690570743e-05, + "loss": 0.8999, + "step": 2244 + }, + { + "epoch": 3.28, + "learning_rate": 1.3104147664589973e-05, + "loss": 0.9156, + "step": 2245 + }, + { + "epoch": 3.29, + "learning_rate": 1.3045847484058748e-05, + "loss": 0.9132, + "step": 2246 + }, + { + "epoch": 3.29, + "learning_rate": 1.2987668230054684e-05, + "loss": 0.862, + "step": 2247 + }, + { + "epoch": 3.29, + "learning_rate": 1.2929609983487257e-05, + "loss": 0.8309, + "step": 2248 + }, + { + "epoch": 3.29, + "learning_rate": 1.287167282509767e-05, + "loss": 0.814, + "step": 2249 + }, + { + "epoch": 3.29, + "learning_rate": 1.2813856835458682e-05, + "loss": 0.8193, + "step": 2250 + }, + { + "epoch": 3.29, + "learning_rate": 1.2756162094974644e-05, + "loss": 0.8668, + "step": 2251 + }, + { + "epoch": 3.3, + "learning_rate": 1.2698588683881186e-05, + "loss": 0.8173, + "step": 2252 + }, + { + "epoch": 3.3, + "learning_rate": 1.2641136682245257e-05, + "loss": 0.8263, + "step": 2253 + }, + { + "epoch": 3.3, + "learning_rate": 1.2583806169964961e-05, + "loss": 0.865, + "step": 2254 + }, + { + "epoch": 3.3, + "learning_rate": 1.252659722676941e-05, + "loss": 0.8536, + "step": 2255 + }, + { + "epoch": 3.3, + "learning_rate": 1.2469509932218703e-05, + "loss": 0.8811, + "step": 2256 + }, + { + "epoch": 3.3, + "learning_rate": 1.2412544365703738e-05, + "loss": 0.8152, + "step": 2257 + }, + { + "epoch": 3.3, + "learning_rate": 1.235570060644612e-05, + "loss": 0.8511, + "step": 2258 + }, + { + "epoch": 3.31, + "learning_rate": 1.2298978733498035e-05, + "loss": 0.9096, + "step": 2259 + }, + { + "epoch": 3.31, + "learning_rate": 1.224237882574224e-05, + "loss": 0.8595, + "step": 2260 + }, + { + "epoch": 3.31, + "learning_rate": 1.2185900961891794e-05, + "loss": 0.9332, + "step": 2261 + }, + { + "epoch": 3.31, + "learning_rate": 1.2129545220490102e-05, + "loss": 0.9013, + "step": 2262 + }, + { + "epoch": 3.31, + "learning_rate": 1.207331167991066e-05, + "loss": 0.9224, + "step": 2263 + }, + { + "epoch": 3.31, + "learning_rate": 1.2017200418357078e-05, + "loss": 0.9001, + "step": 2264 + }, + { + "epoch": 3.31, + "learning_rate": 1.1961211513862858e-05, + "loss": 0.8989, + "step": 2265 + }, + { + "epoch": 3.32, + "learning_rate": 1.1905345044291426e-05, + "loss": 0.8607, + "step": 2266 + }, + { + "epoch": 3.32, + "learning_rate": 1.184960108733586e-05, + "loss": 0.872, + "step": 2267 + }, + { + "epoch": 3.32, + "learning_rate": 1.1793979720518866e-05, + "loss": 0.848, + "step": 2268 + }, + { + "epoch": 3.32, + "learning_rate": 1.1738481021192704e-05, + "loss": 0.875, + "step": 2269 + }, + { + "epoch": 3.32, + "learning_rate": 1.1683105066539068e-05, + "loss": 0.8266, + "step": 2270 + }, + { + "epoch": 3.32, + "learning_rate": 1.1627851933568856e-05, + "loss": 0.9271, + "step": 2271 + }, + { + "epoch": 3.32, + "learning_rate": 1.1572721699122236e-05, + "loss": 0.9028, + "step": 2272 + }, + { + "epoch": 3.33, + "learning_rate": 1.151771443986842e-05, + "loss": 0.8882, + "step": 2273 + }, + { + "epoch": 3.33, + "learning_rate": 1.14628302323056e-05, + "loss": 0.9062, + "step": 2274 + }, + { + "epoch": 3.33, + "learning_rate": 1.1408069152760903e-05, + "loss": 0.8219, + "step": 2275 + }, + { + "epoch": 3.33, + "learning_rate": 1.1353431277390126e-05, + "loss": 0.8061, + "step": 2276 + }, + { + "epoch": 3.33, + "learning_rate": 1.129891668217783e-05, + "loss": 0.8517, + "step": 2277 + }, + { + "epoch": 3.33, + "learning_rate": 1.1244525442937049e-05, + "loss": 0.8283, + "step": 2278 + }, + { + "epoch": 3.34, + "learning_rate": 1.1190257635309275e-05, + "loss": 0.9096, + "step": 2279 + }, + { + "epoch": 3.34, + "learning_rate": 1.113611333476442e-05, + "loss": 0.9243, + "step": 2280 + }, + { + "epoch": 3.34, + "learning_rate": 1.1082092616600538e-05, + "loss": 0.9085, + "step": 2281 + }, + { + "epoch": 3.34, + "learning_rate": 1.1028195555943877e-05, + "loss": 0.8516, + "step": 2282 + }, + { + "epoch": 3.34, + "learning_rate": 1.0974422227748704e-05, + "loss": 0.8565, + "step": 2283 + }, + { + "epoch": 3.34, + "learning_rate": 1.0920772706797167e-05, + "loss": 0.8599, + "step": 2284 + }, + { + "epoch": 3.34, + "learning_rate": 1.0867247067699315e-05, + "loss": 0.9778, + "step": 2285 + }, + { + "epoch": 3.35, + "learning_rate": 1.0813845384892896e-05, + "loss": 0.886, + "step": 2286 + }, + { + "epoch": 3.35, + "learning_rate": 1.0760567732643223e-05, + "loss": 0.9232, + "step": 2287 + }, + { + "epoch": 3.35, + "learning_rate": 1.0707414185043163e-05, + "loss": 0.7584, + "step": 2288 + }, + { + "epoch": 3.35, + "learning_rate": 1.0654384816012953e-05, + "loss": 0.8672, + "step": 2289 + }, + { + "epoch": 3.35, + "learning_rate": 1.0601479699300209e-05, + "loss": 0.8809, + "step": 2290 + }, + { + "epoch": 3.35, + "learning_rate": 1.054869890847967e-05, + "loss": 0.8126, + "step": 2291 + }, + { + "epoch": 3.35, + "learning_rate": 1.0496042516953209e-05, + "loss": 0.9004, + "step": 2292 + }, + { + "epoch": 3.36, + "learning_rate": 1.0443510597949724e-05, + "loss": 0.8827, + "step": 2293 + }, + { + "epoch": 3.36, + "learning_rate": 1.0391103224524956e-05, + "loss": 0.8887, + "step": 2294 + }, + { + "epoch": 3.36, + "learning_rate": 1.0338820469561494e-05, + "loss": 0.8627, + "step": 2295 + }, + { + "epoch": 3.36, + "learning_rate": 1.02866624057686e-05, + "loss": 0.8772, + "step": 2296 + }, + { + "epoch": 3.36, + "learning_rate": 1.0234629105682103e-05, + "loss": 0.9564, + "step": 2297 + }, + { + "epoch": 3.36, + "learning_rate": 1.0182720641664356e-05, + "loss": 0.885, + "step": 2298 + }, + { + "epoch": 3.37, + "learning_rate": 1.013093708590408e-05, + "loss": 0.8678, + "step": 2299 + }, + { + "epoch": 3.37, + "learning_rate": 1.0079278510416313e-05, + "loss": 0.9017, + "step": 2300 + }, + { + "epoch": 3.37, + "learning_rate": 1.0027744987042299e-05, + "loss": 0.9267, + "step": 2301 + }, + { + "epoch": 3.37, + "learning_rate": 9.976336587449309e-06, + "loss": 0.8748, + "step": 2302 + }, + { + "epoch": 3.37, + "learning_rate": 9.925053383130667e-06, + "loss": 0.8395, + "step": 2303 + }, + { + "epoch": 3.37, + "learning_rate": 9.873895445405523e-06, + "loss": 0.9574, + "step": 2304 + }, + { + "epoch": 3.37, + "learning_rate": 9.822862845418912e-06, + "loss": 0.8472, + "step": 2305 + }, + { + "epoch": 3.38, + "learning_rate": 9.771955654141496e-06, + "loss": 0.904, + "step": 2306 + }, + { + "epoch": 3.38, + "learning_rate": 9.721173942369521e-06, + "loss": 0.7936, + "step": 2307 + }, + { + "epoch": 3.38, + "learning_rate": 9.670517780724775e-06, + "loss": 0.7711, + "step": 2308 + }, + { + "epoch": 3.38, + "learning_rate": 9.619987239654405e-06, + "loss": 0.818, + "step": 2309 + }, + { + "epoch": 3.38, + "learning_rate": 9.56958238943093e-06, + "loss": 0.8908, + "step": 2310 + }, + { + "epoch": 3.38, + "learning_rate": 9.519303300151983e-06, + "loss": 0.9463, + "step": 2311 + }, + { + "epoch": 3.38, + "learning_rate": 9.469150041740338e-06, + "loss": 0.8929, + "step": 2312 + }, + { + "epoch": 3.39, + "learning_rate": 9.419122683943793e-06, + "loss": 0.8494, + "step": 2313 + }, + { + "epoch": 3.39, + "learning_rate": 9.369221296335006e-06, + "loss": 0.862, + "step": 2314 + }, + { + "epoch": 3.39, + "learning_rate": 9.319445948311534e-06, + "loss": 0.8727, + "step": 2315 + }, + { + "epoch": 3.39, + "learning_rate": 9.269796709095558e-06, + "loss": 0.8559, + "step": 2316 + }, + { + "epoch": 3.39, + "learning_rate": 9.220273647733969e-06, + "loss": 0.8936, + "step": 2317 + }, + { + "epoch": 3.39, + "learning_rate": 9.170876833098118e-06, + "loss": 0.8938, + "step": 2318 + }, + { + "epoch": 3.39, + "learning_rate": 9.121606333883792e-06, + "loss": 0.8864, + "step": 2319 + }, + { + "epoch": 3.4, + "learning_rate": 9.072462218611155e-06, + "loss": 0.865, + "step": 2320 + }, + { + "epoch": 3.4, + "learning_rate": 9.023444555624571e-06, + "loss": 0.9247, + "step": 2321 + }, + { + "epoch": 3.4, + "learning_rate": 8.974553413092556e-06, + "loss": 0.93, + "step": 2322 + }, + { + "epoch": 3.4, + "learning_rate": 8.925788859007656e-06, + "loss": 0.9428, + "step": 2323 + }, + { + "epoch": 3.4, + "learning_rate": 8.87715096118642e-06, + "loss": 0.8987, + "step": 2324 + }, + { + "epoch": 3.4, + "learning_rate": 8.828639787269244e-06, + "loss": 0.8987, + "step": 2325 + }, + { + "epoch": 3.41, + "learning_rate": 8.78025540472025e-06, + "loss": 0.8056, + "step": 2326 + }, + { + "epoch": 3.41, + "learning_rate": 8.731997880827258e-06, + "loss": 0.9573, + "step": 2327 + }, + { + "epoch": 3.41, + "learning_rate": 8.683867282701686e-06, + "loss": 0.8489, + "step": 2328 + }, + { + "epoch": 3.41, + "learning_rate": 8.635863677278378e-06, + "loss": 0.7916, + "step": 2329 + }, + { + "epoch": 3.41, + "learning_rate": 8.587987131315656e-06, + "loss": 0.8372, + "step": 2330 + }, + { + "epoch": 3.41, + "learning_rate": 8.540237711395072e-06, + "loss": 0.903, + "step": 2331 + }, + { + "epoch": 3.41, + "learning_rate": 8.492615483921395e-06, + "loss": 0.9401, + "step": 2332 + }, + { + "epoch": 3.42, + "learning_rate": 8.445120515122551e-06, + "loss": 0.8648, + "step": 2333 + }, + { + "epoch": 3.42, + "learning_rate": 8.397752871049436e-06, + "loss": 0.9071, + "step": 2334 + }, + { + "epoch": 3.42, + "learning_rate": 8.350512617575912e-06, + "loss": 0.9137, + "step": 2335 + }, + { + "epoch": 3.42, + "learning_rate": 8.303399820398672e-06, + "loss": 0.8674, + "step": 2336 + }, + { + "epoch": 3.42, + "learning_rate": 8.256414545037127e-06, + "loss": 0.8954, + "step": 2337 + }, + { + "epoch": 3.42, + "learning_rate": 8.2095568568334e-06, + "loss": 0.8513, + "step": 2338 + }, + { + "epoch": 3.42, + "learning_rate": 8.162826820952097e-06, + "loss": 0.877, + "step": 2339 + }, + { + "epoch": 3.43, + "learning_rate": 8.116224502380387e-06, + "loss": 0.8524, + "step": 2340 + }, + { + "epoch": 3.43, + "learning_rate": 8.069749965927808e-06, + "loss": 0.8995, + "step": 2341 + }, + { + "epoch": 3.43, + "learning_rate": 8.023403276226126e-06, + "loss": 0.8799, + "step": 2342 + }, + { + "epoch": 3.43, + "learning_rate": 7.977184497729384e-06, + "loss": 0.8406, + "step": 2343 + }, + { + "epoch": 3.43, + "learning_rate": 7.931093694713687e-06, + "loss": 0.8906, + "step": 2344 + }, + { + "epoch": 3.43, + "learning_rate": 7.885130931277218e-06, + "loss": 0.8706, + "step": 2345 + }, + { + "epoch": 3.44, + "learning_rate": 7.839296271340058e-06, + "loss": 0.8608, + "step": 2346 + }, + { + "epoch": 3.44, + "learning_rate": 7.793589778644116e-06, + "loss": 0.9749, + "step": 2347 + }, + { + "epoch": 3.44, + "learning_rate": 7.74801151675314e-06, + "loss": 0.8547, + "step": 2348 + }, + { + "epoch": 3.44, + "learning_rate": 7.702561549052445e-06, + "loss": 0.8711, + "step": 2349 + }, + { + "epoch": 3.44, + "learning_rate": 7.65723993874904e-06, + "loss": 0.8758, + "step": 2350 + }, + { + "epoch": 3.44, + "learning_rate": 7.612046748871327e-06, + "loss": 0.8865, + "step": 2351 + }, + { + "epoch": 3.44, + "learning_rate": 7.566982042269177e-06, + "loss": 0.8912, + "step": 2352 + }, + { + "epoch": 3.44, + "eval_loss": 1.004106879234314, + "eval_runtime": 2.6237, + "eval_samples_per_second": 416.206, + "eval_steps_per_second": 26.299, + "step": 2352 + }, + { + "epoch": 3.45, + "learning_rate": 7.522045881613737e-06, + "loss": 0.932, + "step": 2353 + }, + { + "epoch": 3.45, + "learning_rate": 7.477238329397418e-06, + "loss": 0.9275, + "step": 2354 + }, + { + "epoch": 3.45, + "learning_rate": 7.432559447933785e-06, + "loss": 0.8213, + "step": 2355 + }, + { + "epoch": 3.45, + "learning_rate": 7.3880092993574125e-06, + "loss": 0.9243, + "step": 2356 + }, + { + "epoch": 3.45, + "learning_rate": 7.3435879456239085e-06, + "loss": 0.894, + "step": 2357 + }, + { + "epoch": 3.45, + "learning_rate": 7.299295448509724e-06, + "loss": 0.8915, + "step": 2358 + }, + { + "epoch": 3.45, + "learning_rate": 7.255131869612108e-06, + "loss": 0.8819, + "step": 2359 + }, + { + "epoch": 3.46, + "learning_rate": 7.211097270349066e-06, + "loss": 0.9231, + "step": 2360 + }, + { + "epoch": 3.46, + "learning_rate": 7.167191711959198e-06, + "loss": 0.8868, + "step": 2361 + }, + { + "epoch": 3.46, + "learning_rate": 7.123415255501653e-06, + "loss": 0.9077, + "step": 2362 + }, + { + "epoch": 3.46, + "learning_rate": 7.0797679618560095e-06, + "loss": 0.8692, + "step": 2363 + }, + { + "epoch": 3.46, + "learning_rate": 7.03624989172228e-06, + "loss": 0.8744, + "step": 2364 + }, + { + "epoch": 3.46, + "learning_rate": 6.992861105620752e-06, + "loss": 0.8121, + "step": 2365 + }, + { + "epoch": 3.46, + "learning_rate": 6.949601663891891e-06, + "loss": 0.8156, + "step": 2366 + }, + { + "epoch": 3.47, + "learning_rate": 6.906471626696287e-06, + "loss": 0.8274, + "step": 2367 + }, + { + "epoch": 3.47, + "learning_rate": 6.863471054014592e-06, + "loss": 0.9709, + "step": 2368 + }, + { + "epoch": 3.47, + "learning_rate": 6.820600005647382e-06, + "loss": 0.9252, + "step": 2369 + }, + { + "epoch": 3.47, + "learning_rate": 6.777858541215143e-06, + "loss": 0.9281, + "step": 2370 + }, + { + "epoch": 3.47, + "learning_rate": 6.735246720158117e-06, + "loss": 0.9156, + "step": 2371 + }, + { + "epoch": 3.47, + "learning_rate": 6.692764601736268e-06, + "loss": 0.855, + "step": 2372 + }, + { + "epoch": 3.48, + "learning_rate": 6.6504122450291804e-06, + "loss": 0.8655, + "step": 2373 + }, + { + "epoch": 3.48, + "learning_rate": 6.608189708935964e-06, + "loss": 0.8569, + "step": 2374 + }, + { + "epoch": 3.48, + "learning_rate": 6.566097052175213e-06, + "loss": 0.8501, + "step": 2375 + }, + { + "epoch": 3.48, + "learning_rate": 6.524134333284904e-06, + "loss": 0.9269, + "step": 2376 + }, + { + "epoch": 3.48, + "learning_rate": 6.4823016106222654e-06, + "loss": 0.9151, + "step": 2377 + }, + { + "epoch": 3.48, + "learning_rate": 6.440598942363796e-06, + "loss": 0.8744, + "step": 2378 + }, + { + "epoch": 3.48, + "learning_rate": 6.3990263865050695e-06, + "loss": 0.9239, + "step": 2379 + }, + { + "epoch": 3.49, + "learning_rate": 6.357584000860761e-06, + "loss": 0.9291, + "step": 2380 + }, + { + "epoch": 3.49, + "learning_rate": 6.316271843064536e-06, + "loss": 0.9394, + "step": 2381 + }, + { + "epoch": 3.49, + "learning_rate": 6.275089970568882e-06, + "loss": 0.8364, + "step": 2382 + }, + { + "epoch": 3.49, + "learning_rate": 6.234038440645163e-06, + "loss": 0.9144, + "step": 2383 + }, + { + "epoch": 3.49, + "learning_rate": 6.1931173103834115e-06, + "loss": 0.8727, + "step": 2384 + }, + { + "epoch": 3.49, + "learning_rate": 6.152326636692418e-06, + "loss": 0.8763, + "step": 2385 + }, + { + "epoch": 3.49, + "learning_rate": 6.111666476299438e-06, + "loss": 0.8333, + "step": 2386 + }, + { + "epoch": 3.5, + "learning_rate": 6.071136885750272e-06, + "loss": 0.8994, + "step": 2387 + }, + { + "epoch": 3.5, + "learning_rate": 6.030737921409169e-06, + "loss": 0.9433, + "step": 2388 + }, + { + "epoch": 3.5, + "learning_rate": 5.9904696394586405e-06, + "loss": 0.9362, + "step": 2389 + }, + { + "epoch": 3.5, + "learning_rate": 5.950332095899547e-06, + "loss": 0.948, + "step": 2390 + }, + { + "epoch": 3.5, + "learning_rate": 5.9103253465508605e-06, + "loss": 0.8242, + "step": 2391 + }, + { + "epoch": 3.5, + "learning_rate": 5.870449447049686e-06, + "loss": 0.918, + "step": 2392 + }, + { + "epoch": 3.51, + "learning_rate": 5.830704452851166e-06, + "loss": 0.8843, + "step": 2393 + }, + { + "epoch": 3.51, + "learning_rate": 5.791090419228351e-06, + "loss": 0.8805, + "step": 2394 + }, + { + "epoch": 3.51, + "learning_rate": 5.751607401272241e-06, + "loss": 0.9051, + "step": 2395 + }, + { + "epoch": 3.51, + "learning_rate": 5.71225545389158e-06, + "loss": 0.9524, + "step": 2396 + }, + { + "epoch": 3.51, + "learning_rate": 5.6730346318128455e-06, + "loss": 1.0244, + "step": 2397 + }, + { + "epoch": 3.51, + "learning_rate": 5.633944989580153e-06, + "loss": 0.8581, + "step": 2398 + }, + { + "epoch": 3.51, + "learning_rate": 5.594986581555173e-06, + "loss": 0.8754, + "step": 2399 + }, + { + "epoch": 3.52, + "learning_rate": 5.556159461917121e-06, + "loss": 0.9386, + "step": 2400 + }, + { + "epoch": 3.52, + "learning_rate": 5.517463684662582e-06, + "loss": 0.8832, + "step": 2401 + }, + { + "epoch": 3.52, + "learning_rate": 5.4788993036055115e-06, + "loss": 0.8422, + "step": 2402 + }, + { + "epoch": 3.52, + "learning_rate": 5.440466372377095e-06, + "loss": 0.8945, + "step": 2403 + }, + { + "epoch": 3.52, + "learning_rate": 5.402164944425758e-06, + "loss": 0.8111, + "step": 2404 + }, + { + "epoch": 3.52, + "learning_rate": 5.363995073017047e-06, + "loss": 0.8646, + "step": 2405 + }, + { + "epoch": 3.52, + "learning_rate": 5.325956811233512e-06, + "loss": 0.849, + "step": 2406 + }, + { + "epoch": 3.53, + "learning_rate": 5.288050211974694e-06, + "loss": 0.8714, + "step": 2407 + }, + { + "epoch": 3.53, + "learning_rate": 5.250275327957032e-06, + "loss": 0.8775, + "step": 2408 + }, + { + "epoch": 3.53, + "learning_rate": 5.212632211713797e-06, + "loss": 0.8481, + "step": 2409 + }, + { + "epoch": 3.53, + "learning_rate": 5.1751209155949995e-06, + "loss": 0.9742, + "step": 2410 + }, + { + "epoch": 3.53, + "learning_rate": 5.137741491767345e-06, + "loss": 0.8831, + "step": 2411 + }, + { + "epoch": 3.53, + "learning_rate": 5.100493992214128e-06, + "loss": 0.8925, + "step": 2412 + }, + { + "epoch": 3.54, + "learning_rate": 5.0633784687351915e-06, + "loss": 0.9058, + "step": 2413 + }, + { + "epoch": 3.54, + "learning_rate": 5.026394972946813e-06, + "loss": 0.9003, + "step": 2414 + }, + { + "epoch": 3.54, + "learning_rate": 4.989543556281695e-06, + "loss": 0.8197, + "step": 2415 + }, + { + "epoch": 3.54, + "learning_rate": 4.9528242699888535e-06, + "loss": 0.8675, + "step": 2416 + }, + { + "epoch": 3.54, + "learning_rate": 4.916237165133519e-06, + "loss": 0.9537, + "step": 2417 + }, + { + "epoch": 3.54, + "learning_rate": 4.8797822925971235e-06, + "loss": 0.9662, + "step": 2418 + }, + { + "epoch": 3.54, + "learning_rate": 4.843459703077202e-06, + "loss": 0.9332, + "step": 2419 + }, + { + "epoch": 3.55, + "learning_rate": 4.807269447087348e-06, + "loss": 0.828, + "step": 2420 + }, + { + "epoch": 3.55, + "learning_rate": 4.7712115749570685e-06, + "loss": 0.8799, + "step": 2421 + }, + { + "epoch": 3.55, + "learning_rate": 4.735286136831807e-06, + "loss": 0.9163, + "step": 2422 + }, + { + "epoch": 3.55, + "learning_rate": 4.69949318267281e-06, + "loss": 0.8946, + "step": 2423 + }, + { + "epoch": 3.55, + "learning_rate": 4.66383276225707e-06, + "loss": 0.9073, + "step": 2424 + }, + { + "epoch": 3.55, + "learning_rate": 4.628304925177318e-06, + "loss": 0.8648, + "step": 2425 + }, + { + "epoch": 3.55, + "learning_rate": 4.5929097208418425e-06, + "loss": 0.8846, + "step": 2426 + }, + { + "epoch": 3.56, + "learning_rate": 4.557647198474491e-06, + "loss": 0.9423, + "step": 2427 + }, + { + "epoch": 3.56, + "learning_rate": 4.5225174071146455e-06, + "loss": 0.8671, + "step": 2428 + }, + { + "epoch": 3.56, + "learning_rate": 4.487520395617029e-06, + "loss": 0.9262, + "step": 2429 + }, + { + "epoch": 3.56, + "learning_rate": 4.452656212651751e-06, + "loss": 0.8335, + "step": 2430 + }, + { + "epoch": 3.56, + "learning_rate": 4.417924906704185e-06, + "loss": 0.8801, + "step": 2431 + }, + { + "epoch": 3.56, + "learning_rate": 4.383326526074916e-06, + "loss": 0.895, + "step": 2432 + }, + { + "epoch": 3.56, + "learning_rate": 4.3488611188796545e-06, + "loss": 0.9591, + "step": 2433 + }, + { + "epoch": 3.57, + "learning_rate": 4.314528733049206e-06, + "loss": 0.8998, + "step": 2434 + }, + { + "epoch": 3.57, + "learning_rate": 4.280329416329365e-06, + "loss": 0.9133, + "step": 2435 + }, + { + "epoch": 3.57, + "learning_rate": 4.24626321628091e-06, + "loss": 0.8861, + "step": 2436 + }, + { + "epoch": 3.57, + "learning_rate": 4.212330180279456e-06, + "loss": 0.8638, + "step": 2437 + }, + { + "epoch": 3.57, + "learning_rate": 4.1785303555154085e-06, + "loss": 0.8281, + "step": 2438 + }, + { + "epoch": 3.57, + "learning_rate": 4.144863788993991e-06, + "loss": 0.9246, + "step": 2439 + }, + { + "epoch": 3.58, + "learning_rate": 4.111330527535029e-06, + "loss": 0.9092, + "step": 2440 + }, + { + "epoch": 3.58, + "learning_rate": 4.077930617773007e-06, + "loss": 0.8681, + "step": 2441 + }, + { + "epoch": 3.58, + "learning_rate": 4.044664106156915e-06, + "loss": 0.8603, + "step": 2442 + }, + { + "epoch": 3.58, + "learning_rate": 4.0115310389503004e-06, + "loss": 0.8667, + "step": 2443 + }, + { + "epoch": 3.58, + "learning_rate": 3.9785314622310495e-06, + "loss": 0.921, + "step": 2444 + }, + { + "epoch": 3.58, + "learning_rate": 3.945665421891465e-06, + "loss": 0.8891, + "step": 2445 + }, + { + "epoch": 3.58, + "learning_rate": 3.912932963638116e-06, + "loss": 0.8857, + "step": 2446 + }, + { + "epoch": 3.59, + "learning_rate": 3.880334132991792e-06, + "loss": 0.8933, + "step": 2447 + }, + { + "epoch": 3.59, + "learning_rate": 3.84786897528745e-06, + "loss": 0.961, + "step": 2448 + }, + { + "epoch": 3.59, + "learning_rate": 3.815537535674174e-06, + "loss": 0.9356, + "step": 2449 + }, + { + "epoch": 3.59, + "learning_rate": 3.783339859115065e-06, + "loss": 0.907, + "step": 2450 + }, + { + "epoch": 3.59, + "learning_rate": 3.751275990387193e-06, + "loss": 0.7907, + "step": 2451 + }, + { + "epoch": 3.59, + "learning_rate": 3.7193459740815674e-06, + "loss": 0.832, + "step": 2452 + }, + { + "epoch": 3.59, + "learning_rate": 3.687549854603023e-06, + "loss": 0.8745, + "step": 2453 + }, + { + "epoch": 3.6, + "learning_rate": 3.655887676170222e-06, + "loss": 0.8743, + "step": 2454 + }, + { + "epoch": 3.6, + "learning_rate": 3.624359482815509e-06, + "loss": 0.8262, + "step": 2455 + }, + { + "epoch": 3.6, + "learning_rate": 3.592965318384944e-06, + "loss": 0.8567, + "step": 2456 + }, + { + "epoch": 3.6, + "learning_rate": 3.561705226538148e-06, + "loss": 0.8733, + "step": 2457 + }, + { + "epoch": 3.6, + "learning_rate": 3.5305792507483117e-06, + "loss": 0.9398, + "step": 2458 + }, + { + "epoch": 3.6, + "learning_rate": 3.4995874343021094e-06, + "loss": 0.8461, + "step": 2459 + }, + { + "epoch": 3.61, + "learning_rate": 3.4687298202996655e-06, + "loss": 0.8773, + "step": 2460 + }, + { + "epoch": 3.61, + "learning_rate": 3.4380064516544297e-06, + "loss": 0.9523, + "step": 2461 + }, + { + "epoch": 3.61, + "learning_rate": 3.40741737109318e-06, + "loss": 0.8672, + "step": 2462 + }, + { + "epoch": 3.61, + "learning_rate": 3.3769626211559102e-06, + "loss": 0.8343, + "step": 2463 + }, + { + "epoch": 3.61, + "learning_rate": 3.3466422441958634e-06, + "loss": 0.8847, + "step": 2464 + }, + { + "epoch": 3.61, + "learning_rate": 3.3164562823793653e-06, + "loss": 0.8854, + "step": 2465 + }, + { + "epoch": 3.61, + "learning_rate": 3.286404777685792e-06, + "loss": 0.8703, + "step": 2466 + }, + { + "epoch": 3.62, + "learning_rate": 3.25648777190759e-06, + "loss": 0.8572, + "step": 2467 + }, + { + "epoch": 3.62, + "learning_rate": 3.226705306650113e-06, + "loss": 0.8701, + "step": 2468 + }, + { + "epoch": 3.62, + "learning_rate": 3.1970574233316397e-06, + "loss": 0.8247, + "step": 2469 + }, + { + "epoch": 3.62, + "learning_rate": 3.167544163183256e-06, + "loss": 0.9345, + "step": 2470 + }, + { + "epoch": 3.62, + "learning_rate": 3.138165567248863e-06, + "loss": 0.8971, + "step": 2471 + }, + { + "epoch": 3.62, + "learning_rate": 3.1089216763850572e-06, + "loss": 0.9541, + "step": 2472 + }, + { + "epoch": 3.62, + "learning_rate": 3.079812531261095e-06, + "loss": 0.8111, + "step": 2473 + }, + { + "epoch": 3.63, + "learning_rate": 3.050838172358883e-06, + "loss": 0.8347, + "step": 2474 + }, + { + "epoch": 3.63, + "learning_rate": 3.021998639972845e-06, + "loss": 0.8189, + "step": 2475 + }, + { + "epoch": 3.63, + "learning_rate": 2.9932939742099208e-06, + "loss": 0.8757, + "step": 2476 + }, + { + "epoch": 3.63, + "learning_rate": 2.9647242149895006e-06, + "loss": 0.9051, + "step": 2477 + }, + { + "epoch": 3.63, + "learning_rate": 2.936289402043313e-06, + "loss": 0.9259, + "step": 2478 + }, + { + "epoch": 3.63, + "learning_rate": 2.9079895749154927e-06, + "loss": 0.8777, + "step": 2479 + }, + { + "epoch": 3.63, + "learning_rate": 2.8798247729623806e-06, + "loss": 0.8496, + "step": 2480 + }, + { + "epoch": 3.64, + "learning_rate": 2.851795035352578e-06, + "loss": 0.8094, + "step": 2481 + }, + { + "epoch": 3.64, + "learning_rate": 2.8239004010668367e-06, + "loss": 0.8504, + "step": 2482 + }, + { + "epoch": 3.64, + "learning_rate": 2.796140908898026e-06, + "loss": 0.8756, + "step": 2483 + }, + { + "epoch": 3.64, + "learning_rate": 2.7685165974510986e-06, + "loss": 0.9191, + "step": 2484 + }, + { + "epoch": 3.64, + "learning_rate": 2.741027505142979e-06, + "loss": 0.8877, + "step": 2485 + }, + { + "epoch": 3.64, + "learning_rate": 2.7136736702025433e-06, + "loss": 0.8375, + "step": 2486 + }, + { + "epoch": 3.65, + "learning_rate": 2.6864551306705842e-06, + "loss": 0.9617, + "step": 2487 + }, + { + "epoch": 3.65, + "learning_rate": 2.659371924399734e-06, + "loss": 0.8413, + "step": 2488 + }, + { + "epoch": 3.65, + "learning_rate": 2.6324240890544193e-06, + "loss": 0.9178, + "step": 2489 + }, + { + "epoch": 3.65, + "learning_rate": 2.605611662110785e-06, + "loss": 0.8993, + "step": 2490 + }, + { + "epoch": 3.65, + "learning_rate": 2.578934680856715e-06, + "loss": 0.8596, + "step": 2491 + }, + { + "epoch": 3.65, + "learning_rate": 2.552393182391677e-06, + "loss": 0.908, + "step": 2492 + }, + { + "epoch": 3.65, + "learning_rate": 2.5259872036267564e-06, + "loss": 0.8652, + "step": 2493 + }, + { + "epoch": 3.66, + "learning_rate": 2.499716781284556e-06, + "loss": 0.8642, + "step": 2494 + }, + { + "epoch": 3.66, + "learning_rate": 2.473581951899184e-06, + "loss": 0.8744, + "step": 2495 + }, + { + "epoch": 3.66, + "learning_rate": 2.4475827518161356e-06, + "loss": 0.8064, + "step": 2496 + }, + { + "epoch": 3.66, + "learning_rate": 2.421719217192331e-06, + "loss": 0.7801, + "step": 2497 + }, + { + "epoch": 3.66, + "learning_rate": 2.395991383995999e-06, + "loss": 0.9131, + "step": 2498 + }, + { + "epoch": 3.66, + "learning_rate": 2.3703992880066638e-06, + "loss": 0.8389, + "step": 2499 + }, + { + "epoch": 3.66, + "learning_rate": 2.3449429648150665e-06, + "loss": 0.8062, + "step": 2500 + }, + { + "epoch": 3.67, + "learning_rate": 2.3196224498231443e-06, + "loss": 0.9015, + "step": 2501 + }, + { + "epoch": 3.67, + "learning_rate": 2.294437778243963e-06, + "loss": 0.9179, + "step": 2502 + }, + { + "epoch": 3.67, + "learning_rate": 2.2693889851016393e-06, + "loss": 0.9013, + "step": 2503 + }, + { + "epoch": 3.67, + "learning_rate": 2.2444761052313856e-06, + "loss": 0.8867, + "step": 2504 + }, + { + "epoch": 3.67, + "learning_rate": 2.219699173279355e-06, + "loss": 0.8618, + "step": 2505 + }, + { + "epoch": 3.67, + "learning_rate": 2.1950582237026394e-06, + "loss": 0.8839, + "step": 2506 + }, + { + "epoch": 3.68, + "learning_rate": 2.1705532907692615e-06, + "loss": 0.9381, + "step": 2507 + }, + { + "epoch": 3.68, + "learning_rate": 2.1461844085580385e-06, + "loss": 0.793, + "step": 2508 + }, + { + "epoch": 3.68, + "learning_rate": 2.1219516109586056e-06, + "loss": 0.8423, + "step": 2509 + }, + { + "epoch": 3.68, + "learning_rate": 2.0978549316713612e-06, + "loss": 0.8737, + "step": 2510 + }, + { + "epoch": 3.68, + "learning_rate": 2.073894404207366e-06, + "loss": 0.8675, + "step": 2511 + }, + { + "epoch": 3.68, + "learning_rate": 2.050070061888365e-06, + "loss": 0.8494, + "step": 2512 + }, + { + "epoch": 3.68, + "learning_rate": 2.0263819378466884e-06, + "loss": 0.9556, + "step": 2513 + }, + { + "epoch": 3.69, + "learning_rate": 2.002830065025263e-06, + "loss": 0.8991, + "step": 2514 + }, + { + "epoch": 3.69, + "learning_rate": 1.9794144761775212e-06, + "loss": 0.8551, + "step": 2515 + }, + { + "epoch": 3.69, + "learning_rate": 1.9561352038673263e-06, + "loss": 0.8836, + "step": 2516 + }, + { + "epoch": 3.69, + "learning_rate": 1.9329922804690258e-06, + "loss": 0.9173, + "step": 2517 + }, + { + "epoch": 3.69, + "learning_rate": 1.909985738167308e-06, + "loss": 0.8696, + "step": 2518 + }, + { + "epoch": 3.69, + "learning_rate": 1.8871156089572018e-06, + "loss": 0.8177, + "step": 2519 + }, + { + "epoch": 3.69, + "learning_rate": 1.8643819246440542e-06, + "loss": 0.7828, + "step": 2520 + }, + { + "epoch": 3.69, + "eval_loss": 1.0041353702545166, + "eval_runtime": 2.6123, + "eval_samples_per_second": 418.024, + "eval_steps_per_second": 26.414, + "step": 2520 + }, + { + "epoch": 3.7, + "learning_rate": 1.841784716843431e-06, + "loss": 0.8021, + "step": 2521 + }, + { + "epoch": 3.7, + "learning_rate": 1.8193240169810943e-06, + "loss": 0.878, + "step": 2522 + }, + { + "epoch": 3.7, + "learning_rate": 1.7969998562929913e-06, + "loss": 0.8315, + "step": 2523 + }, + { + "epoch": 3.7, + "learning_rate": 1.7748122658251876e-06, + "loss": 0.9345, + "step": 2524 + }, + { + "epoch": 3.7, + "learning_rate": 1.7527612764338008e-06, + "loss": 0.9141, + "step": 2525 + }, + { + "epoch": 3.7, + "learning_rate": 1.7308469187849896e-06, + "loss": 0.9472, + "step": 2526 + }, + { + "epoch": 3.7, + "learning_rate": 1.7090692233548866e-06, + "loss": 0.7864, + "step": 2527 + }, + { + "epoch": 3.71, + "learning_rate": 1.6874282204295766e-06, + "loss": 0.794, + "step": 2528 + }, + { + "epoch": 3.71, + "learning_rate": 1.665923940105074e-06, + "loss": 0.9277, + "step": 2529 + }, + { + "epoch": 3.71, + "learning_rate": 1.6445564122872015e-06, + "loss": 0.8783, + "step": 2530 + }, + { + "epoch": 3.71, + "learning_rate": 1.623325666691644e-06, + "loss": 0.9033, + "step": 2531 + }, + { + "epoch": 3.71, + "learning_rate": 1.6022317328438506e-06, + "loss": 0.9331, + "step": 2532 + }, + { + "epoch": 3.71, + "learning_rate": 1.5812746400790001e-06, + "loss": 0.814, + "step": 2533 + }, + { + "epoch": 3.72, + "learning_rate": 1.56045441754199e-06, + "loss": 0.936, + "step": 2534 + }, + { + "epoch": 3.72, + "learning_rate": 1.5397710941873366e-06, + "loss": 0.8678, + "step": 2535 + }, + { + "epoch": 3.72, + "learning_rate": 1.5192246987791981e-06, + "loss": 0.8692, + "step": 2536 + }, + { + "epoch": 3.72, + "learning_rate": 1.4988152598913064e-06, + "loss": 0.9103, + "step": 2537 + }, + { + "epoch": 3.72, + "learning_rate": 1.478542805906913e-06, + "loss": 0.8532, + "step": 2538 + }, + { + "epoch": 3.72, + "learning_rate": 1.4584073650187878e-06, + "loss": 0.8255, + "step": 2539 + }, + { + "epoch": 3.72, + "learning_rate": 1.4384089652291543e-06, + "loss": 0.8267, + "step": 2540 + }, + { + "epoch": 3.73, + "learning_rate": 1.4185476343496317e-06, + "loss": 0.8717, + "step": 2541 + }, + { + "epoch": 3.73, + "learning_rate": 1.3988234000012367e-06, + "loss": 0.9405, + "step": 2542 + }, + { + "epoch": 3.73, + "learning_rate": 1.3792362896143162e-06, + "loss": 0.8314, + "step": 2543 + }, + { + "epoch": 3.73, + "learning_rate": 1.3597863304285475e-06, + "loss": 0.8815, + "step": 2544 + }, + { + "epoch": 3.73, + "learning_rate": 1.3404735494928266e-06, + "loss": 0.8933, + "step": 2545 + }, + { + "epoch": 3.73, + "learning_rate": 1.3212979736653142e-06, + "loss": 0.8637, + "step": 2546 + }, + { + "epoch": 3.73, + "learning_rate": 1.302259629613356e-06, + "loss": 0.981, + "step": 2547 + }, + { + "epoch": 3.74, + "learning_rate": 1.2833585438134287e-06, + "loss": 0.9305, + "step": 2548 + }, + { + "epoch": 3.74, + "learning_rate": 1.2645947425511395e-06, + "loss": 0.9079, + "step": 2549 + }, + { + "epoch": 3.74, + "learning_rate": 1.2459682519211923e-06, + "loss": 0.8601, + "step": 2550 + }, + { + "epoch": 3.74, + "learning_rate": 1.2274790978272998e-06, + "loss": 0.8278, + "step": 2551 + }, + { + "epoch": 3.74, + "learning_rate": 1.209127305982205e-06, + "loss": 0.8512, + "step": 2552 + }, + { + "epoch": 3.74, + "learning_rate": 1.1909129019076036e-06, + "loss": 0.9256, + "step": 2553 + }, + { + "epoch": 3.75, + "learning_rate": 1.1728359109341446e-06, + "loss": 0.8918, + "step": 2554 + }, + { + "epoch": 3.75, + "learning_rate": 1.1548963582013961e-06, + "loss": 0.8318, + "step": 2555 + }, + { + "epoch": 3.75, + "learning_rate": 1.1370942686577347e-06, + "loss": 0.8787, + "step": 2556 + }, + { + "epoch": 3.75, + "learning_rate": 1.1194296670604233e-06, + "loss": 0.9061, + "step": 2557 + }, + { + "epoch": 3.75, + "learning_rate": 1.1019025779754666e-06, + "loss": 0.8601, + "step": 2558 + }, + { + "epoch": 3.75, + "learning_rate": 1.0845130257777114e-06, + "loss": 0.8982, + "step": 2559 + }, + { + "epoch": 3.75, + "learning_rate": 1.0672610346506463e-06, + "loss": 0.8479, + "step": 2560 + }, + { + "epoch": 3.76, + "learning_rate": 1.0501466285865126e-06, + "loss": 0.8491, + "step": 2561 + }, + { + "epoch": 3.76, + "learning_rate": 1.0331698313861937e-06, + "loss": 0.9286, + "step": 2562 + }, + { + "epoch": 3.76, + "learning_rate": 1.016330666659182e-06, + "loss": 0.7757, + "step": 2563 + }, + { + "epoch": 3.76, + "learning_rate": 9.996291578236228e-07, + "loss": 0.8271, + "step": 2564 + }, + { + "epoch": 3.76, + "learning_rate": 9.830653281061474e-07, + "loss": 0.8242, + "step": 2565 + }, + { + "epoch": 3.76, + "learning_rate": 9.666392005419856e-07, + "loss": 0.8585, + "step": 2566 + }, + { + "epoch": 3.76, + "learning_rate": 9.503507979748305e-07, + "loss": 0.8939, + "step": 2567 + }, + { + "epoch": 3.77, + "learning_rate": 9.342001430568292e-07, + "loss": 0.9021, + "step": 2568 + }, + { + "epoch": 3.77, + "learning_rate": 9.18187258248604e-07, + "loss": 0.8612, + "step": 2569 + }, + { + "epoch": 3.77, + "learning_rate": 9.023121658191636e-07, + "loss": 0.9391, + "step": 2570 + }, + { + "epoch": 3.77, + "learning_rate": 8.865748878458702e-07, + "loss": 0.9055, + "step": 2571 + }, + { + "epoch": 3.77, + "learning_rate": 8.709754462144615e-07, + "loss": 0.974, + "step": 2572 + }, + { + "epoch": 3.77, + "learning_rate": 8.555138626189618e-07, + "loss": 0.9572, + "step": 2573 + }, + { + "epoch": 3.77, + "learning_rate": 8.401901585616823e-07, + "loss": 0.8094, + "step": 2574 + }, + { + "epoch": 3.78, + "learning_rate": 8.250043553532094e-07, + "loss": 0.8996, + "step": 2575 + }, + { + "epoch": 3.78, + "learning_rate": 8.099564741123166e-07, + "loss": 0.8258, + "step": 2576 + }, + { + "epoch": 3.78, + "learning_rate": 7.950465357659864e-07, + "loss": 0.868, + "step": 2577 + }, + { + "epoch": 3.78, + "learning_rate": 7.802745610493656e-07, + "loss": 0.8502, + "step": 2578 + }, + { + "epoch": 3.78, + "learning_rate": 7.656405705057435e-07, + "loss": 0.8995, + "step": 2579 + }, + { + "epoch": 3.78, + "learning_rate": 7.511445844864962e-07, + "loss": 0.9136, + "step": 2580 + }, + { + "epoch": 3.79, + "learning_rate": 7.367866231511089e-07, + "loss": 0.8804, + "step": 2581 + }, + { + "epoch": 3.79, + "learning_rate": 7.22566706467076e-07, + "loss": 0.8206, + "step": 2582 + }, + { + "epoch": 3.79, + "learning_rate": 7.084848542099342e-07, + "loss": 0.9475, + "step": 2583 + }, + { + "epoch": 3.79, + "learning_rate": 6.945410859632295e-07, + "loss": 0.8562, + "step": 2584 + }, + { + "epoch": 3.79, + "learning_rate": 6.807354211184613e-07, + "loss": 0.9127, + "step": 2585 + }, + { + "epoch": 3.79, + "learning_rate": 6.670678788750717e-07, + "loss": 0.9582, + "step": 2586 + }, + { + "epoch": 3.79, + "learning_rate": 6.535384782404009e-07, + "loss": 0.9698, + "step": 2587 + }, + { + "epoch": 3.8, + "learning_rate": 6.401472380297091e-07, + "loss": 0.8934, + "step": 2588 + }, + { + "epoch": 3.8, + "learning_rate": 6.268941768660886e-07, + "loss": 0.8583, + "step": 2589 + }, + { + "epoch": 3.8, + "learning_rate": 6.137793131804737e-07, + "loss": 0.7896, + "step": 2590 + }, + { + "epoch": 3.8, + "learning_rate": 6.008026652116305e-07, + "loss": 0.9075, + "step": 2591 + }, + { + "epoch": 3.8, + "learning_rate": 5.87964251006079e-07, + "loss": 0.8205, + "step": 2592 + }, + { + "epoch": 3.8, + "learning_rate": 5.752640884181037e-07, + "loss": 0.8261, + "step": 2593 + }, + { + "epoch": 3.8, + "learning_rate": 5.627021951097545e-07, + "loss": 0.8202, + "step": 2594 + }, + { + "epoch": 3.81, + "learning_rate": 5.502785885507456e-07, + "loss": 0.8793, + "step": 2595 + }, + { + "epoch": 3.81, + "learning_rate": 5.379932860185122e-07, + "loss": 0.8675, + "step": 2596 + }, + { + "epoch": 3.81, + "learning_rate": 5.258463045981432e-07, + "loss": 0.8722, + "step": 2597 + }, + { + "epoch": 3.81, + "learning_rate": 5.13837661182337e-07, + "loss": 0.8858, + "step": 2598 + }, + { + "epoch": 3.81, + "learning_rate": 5.019673724714458e-07, + "loss": 0.9106, + "step": 2599 + }, + { + "epoch": 3.81, + "learning_rate": 4.902354549733978e-07, + "loss": 0.8488, + "step": 2600 + }, + { + "epoch": 3.82, + "learning_rate": 4.786419250036866e-07, + "loss": 0.879, + "step": 2601 + }, + { + "epoch": 3.82, + "learning_rate": 4.6718679868533734e-07, + "loss": 0.9044, + "step": 2602 + }, + { + "epoch": 3.82, + "learning_rate": 4.5587009194894004e-07, + "loss": 0.8939, + "step": 2603 + }, + { + "epoch": 3.82, + "learning_rate": 4.44691820532539e-07, + "loss": 0.8272, + "step": 2604 + }, + { + "epoch": 3.82, + "learning_rate": 4.336519999816879e-07, + "loss": 0.8755, + "step": 2605 + }, + { + "epoch": 3.82, + "learning_rate": 4.227506456493835e-07, + "loss": 0.9187, + "step": 2606 + }, + { + "epoch": 3.82, + "learning_rate": 4.1198777269605413e-07, + "loss": 0.9211, + "step": 2607 + }, + { + "epoch": 3.83, + "learning_rate": 4.0136339608957127e-07, + "loss": 0.9451, + "step": 2608 + }, + { + "epoch": 3.83, + "learning_rate": 3.908775306051604e-07, + "loss": 0.8035, + "step": 2609 + }, + { + "epoch": 3.83, + "learning_rate": 3.805301908254455e-07, + "loss": 0.8989, + "step": 2610 + }, + { + "epoch": 3.83, + "learning_rate": 3.7032139114041574e-07, + "loss": 0.8378, + "step": 2611 + }, + { + "epoch": 3.83, + "learning_rate": 3.6025114574734785e-07, + "loss": 0.8598, + "step": 2612 + }, + { + "epoch": 3.83, + "learning_rate": 3.5031946865088373e-07, + "loss": 0.8903, + "step": 2613 + }, + { + "epoch": 3.83, + "learning_rate": 3.405263736629416e-07, + "loss": 0.8726, + "step": 2614 + }, + { + "epoch": 3.84, + "learning_rate": 3.3087187440268287e-07, + "loss": 0.9027, + "step": 2615 + }, + { + "epoch": 3.84, + "learning_rate": 3.2135598429657853e-07, + "loss": 0.8502, + "step": 2616 + }, + { + "epoch": 3.84, + "learning_rate": 3.1197871657828725e-07, + "loss": 0.9659, + "step": 2617 + }, + { + "epoch": 3.84, + "learning_rate": 3.027400842887218e-07, + "loss": 0.865, + "step": 2618 + }, + { + "epoch": 3.84, + "learning_rate": 2.9364010027599364e-07, + "loss": 0.8869, + "step": 2619 + }, + { + "epoch": 3.84, + "learning_rate": 2.846787771953574e-07, + "loss": 0.9345, + "step": 2620 + }, + { + "epoch": 3.85, + "learning_rate": 2.758561275092886e-07, + "loss": 0.9017, + "step": 2621 + }, + { + "epoch": 3.85, + "learning_rate": 2.6717216348737253e-07, + "loss": 0.8029, + "step": 2622 + }, + { + "epoch": 3.85, + "learning_rate": 2.586268972063377e-07, + "loss": 0.8922, + "step": 2623 + }, + { + "epoch": 3.85, + "learning_rate": 2.5022034055003364e-07, + "loss": 0.8617, + "step": 2624 + }, + { + "epoch": 3.85, + "learning_rate": 2.419525052093863e-07, + "loss": 0.8696, + "step": 2625 + }, + { + "epoch": 3.85, + "learning_rate": 2.3382340268242042e-07, + "loss": 0.9427, + "step": 2626 + }, + { + "epoch": 3.85, + "learning_rate": 2.2583304427421515e-07, + "loss": 0.9191, + "step": 2627 + }, + { + "epoch": 3.86, + "learning_rate": 2.179814410969261e-07, + "loss": 0.8317, + "step": 2628 + }, + { + "epoch": 3.86, + "learning_rate": 2.1026860406970772e-07, + "loss": 0.8796, + "step": 2629 + }, + { + "epoch": 3.86, + "learning_rate": 2.0269454391874666e-07, + "loss": 0.8337, + "step": 2630 + }, + { + "epoch": 3.86, + "learning_rate": 1.952592711772505e-07, + "loss": 0.793, + "step": 2631 + }, + { + "epoch": 3.86, + "learning_rate": 1.8796279618537028e-07, + "loss": 0.8609, + "step": 2632 + }, + { + "epoch": 3.86, + "learning_rate": 1.8080512909028903e-07, + "loss": 0.8442, + "step": 2633 + }, + { + "epoch": 3.86, + "learning_rate": 1.7378627984612207e-07, + "loss": 0.8088, + "step": 2634 + }, + { + "epoch": 3.87, + "learning_rate": 1.669062582139169e-07, + "loss": 0.8417, + "step": 2635 + }, + { + "epoch": 3.87, + "learning_rate": 1.6016507376169777e-07, + "loss": 0.8682, + "step": 2636 + }, + { + "epoch": 3.87, + "learning_rate": 1.5356273586435432e-07, + "loss": 0.8889, + "step": 2637 + }, + { + "epoch": 3.87, + "learning_rate": 1.470992537037197e-07, + "loss": 0.898, + "step": 2638 + }, + { + "epoch": 3.87, + "learning_rate": 1.4077463626852582e-07, + "loss": 0.9413, + "step": 2639 + }, + { + "epoch": 3.87, + "learning_rate": 1.3458889235435922e-07, + "loss": 0.8557, + "step": 2640 + }, + { + "epoch": 3.87, + "learning_rate": 1.2854203056369417e-07, + "loss": 0.8772, + "step": 2641 + }, + { + "epoch": 3.88, + "learning_rate": 1.2263405930585948e-07, + "loss": 0.9191, + "step": 2642 + }, + { + "epoch": 3.88, + "learning_rate": 1.1686498679702728e-07, + "loss": 0.8603, + "step": 2643 + }, + { + "epoch": 3.88, + "learning_rate": 1.1123482106021322e-07, + "loss": 0.813, + "step": 2644 + }, + { + "epoch": 3.88, + "learning_rate": 1.0574356992525403e-07, + "loss": 0.8411, + "step": 2645 + }, + { + "epoch": 3.88, + "learning_rate": 1.003912410287744e-07, + "loss": 0.8772, + "step": 2646 + }, + { + "epoch": 3.88, + "learning_rate": 9.517784181422019e-08, + "loss": 0.9134, + "step": 2647 + }, + { + "epoch": 3.89, + "learning_rate": 9.010337953185843e-08, + "loss": 0.8586, + "step": 2648 + }, + { + "epoch": 3.89, + "learning_rate": 8.516786123867748e-08, + "loss": 0.8817, + "step": 2649 + }, + { + "epoch": 3.89, + "learning_rate": 8.037129379847574e-08, + "loss": 0.8549, + "step": 2650 + }, + { + "epoch": 3.89, + "learning_rate": 7.571368388181732e-08, + "loss": 0.8673, + "step": 2651 + }, + { + "epoch": 3.89, + "learning_rate": 7.119503796599868e-08, + "loss": 0.8912, + "step": 2652 + }, + { + "epoch": 3.89, + "learning_rate": 6.68153623350598e-08, + "loss": 0.9305, + "step": 2653 + }, + { + "epoch": 3.89, + "learning_rate": 6.25746630798063e-08, + "loss": 0.9182, + "step": 2654 + }, + { + "epoch": 3.9, + "learning_rate": 5.847294609773179e-08, + "loss": 0.8962, + "step": 2655 + }, + { + "epoch": 3.9, + "learning_rate": 5.451021709307336e-08, + "loss": 0.9156, + "step": 2656 + }, + { + "epoch": 3.9, + "learning_rate": 5.068648157675604e-08, + "loss": 0.8457, + "step": 2657 + }, + { + "epoch": 3.9, + "learning_rate": 4.700174486644837e-08, + "loss": 0.9179, + "step": 2658 + }, + { + "epoch": 3.9, + "learning_rate": 4.3456012086462436e-08, + "loss": 0.8552, + "step": 2659 + }, + { + "epoch": 3.9, + "learning_rate": 4.0049288167842705e-08, + "loss": 0.9144, + "step": 2660 + }, + { + "epoch": 3.9, + "learning_rate": 3.6781577848277185e-08, + "loss": 0.8938, + "step": 2661 + }, + { + "epoch": 3.91, + "learning_rate": 3.365288567216407e-08, + "loss": 0.8687, + "step": 2662 + }, + { + "epoch": 3.91, + "learning_rate": 3.0663215990534014e-08, + "loss": 0.9071, + "step": 2663 + }, + { + "epoch": 3.91, + "learning_rate": 2.7812572961127824e-08, + "loss": 0.7512, + "step": 2664 + }, + { + "epoch": 3.91, + "learning_rate": 2.510096054829658e-08, + "loss": 0.889, + "step": 2665 + }, + { + "epoch": 3.91, + "learning_rate": 2.2528382523057113e-08, + "loss": 0.8895, + "step": 2666 + }, + { + "epoch": 3.91, + "learning_rate": 2.0094842463092012e-08, + "loss": 0.92, + "step": 2667 + }, + { + "epoch": 3.92, + "learning_rate": 1.7800343752683023e-08, + "loss": 0.9548, + "step": 2668 + }, + { + "epoch": 3.92, + "learning_rate": 1.564488958279986e-08, + "loss": 0.8678, + "step": 2669 + }, + { + "epoch": 3.92, + "learning_rate": 1.3628482951000277e-08, + "loss": 0.8236, + "step": 2670 + }, + { + "epoch": 3.92, + "learning_rate": 1.1751126661496692e-08, + "loss": 0.8774, + "step": 2671 + }, + { + "epoch": 3.92, + "learning_rate": 1.0012823325111776e-08, + "loss": 0.9323, + "step": 2672 + }, + { + "epoch": 3.92, + "learning_rate": 8.413575359289549e-09, + "loss": 0.8945, + "step": 2673 + }, + { + "epoch": 3.92, + "learning_rate": 6.953384988095391e-09, + "loss": 0.8771, + "step": 2674 + }, + { + "epoch": 3.93, + "learning_rate": 5.6322542422049266e-09, + "loss": 0.8803, + "step": 2675 + }, + { + "epoch": 3.93, + "learning_rate": 4.4501849589040355e-09, + "loss": 0.8569, + "step": 2676 + }, + { + "epoch": 3.93, + "learning_rate": 3.407178782088849e-09, + "loss": 0.8887, + "step": 2677 + }, + { + "epoch": 3.93, + "learning_rate": 2.5032371622546457e-09, + "loss": 0.8661, + "step": 2678 + }, + { + "epoch": 3.93, + "learning_rate": 1.7383613565291612e-09, + "loss": 0.937, + "step": 2679 + }, + { + "epoch": 3.93, + "learning_rate": 1.1125524285948707e-09, + "loss": 0.8214, + "step": 2680 + }, + { + "epoch": 3.93, + "learning_rate": 6.258112487667056e-10, + "loss": 0.9334, + "step": 2681 + }, + { + "epoch": 3.94, + "learning_rate": 2.781384939476439e-10, + "loss": 0.8318, + "step": 2682 + }, + { + "epoch": 3.94, + "learning_rate": 6.953464766201734e-11, + "loss": 0.8687, + "step": 2683 + }, + { + "epoch": 3.94, + "learning_rate": 0.0, + "loss": 0.9612, + "step": 2684 + } + ], + "logging_steps": 1, + "max_steps": 2684, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 671, + "total_flos": 8.33944645175083e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2684/training_args.bin b/checkpoint-2684/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..439fe237329d4c6dab9a083d1f0b3c5d2e07ff34 --- /dev/null +++ b/checkpoint-2684/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f074b3cd0fbc5cecae753dfd6c83754f9e22c6bc7af03db47b3beb5a1a41c9 +size 4923 diff --git a/checkpoint-671/README.md b/checkpoint-671/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c40158a9bf29b5b6a4b1c7d97250d59a2f05ed92 --- /dev/null +++ b/checkpoint-671/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +base_model: openlm-research/open_llama_3b_v2 +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] + + +### Framework versions + +- PEFT 0.8.2 \ No newline at end of file diff --git a/checkpoint-671/adapter_config.json b/checkpoint-671/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a75a5db14cc030f9130cc346972670dcccc55fe2 --- /dev/null +++ b/checkpoint-671/adapter_config.json @@ -0,0 +1,32 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "openlm-research/open_llama_3b_v2", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 16, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "gate_proj", + "q_proj", + "down_proj", + "up_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-671/adapter_model.safetensors b/checkpoint-671/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3ce54d6ea0dac09dd4b643741c57cb02f37113d --- /dev/null +++ b/checkpoint-671/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268c795230d77e0188bc13d06c17c1b38f65bd149f0d20f7de385ae49c2f4ad2 +size 50899792 diff --git a/checkpoint-671/optimizer.pt b/checkpoint-671/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..017821170fd803cdcb40edf2e6f31145afc43ebc --- /dev/null +++ b/checkpoint-671/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec5c16376ee9f665d2e5de631e50630df492e50c67e3fdc60231f0451086b74 +size 25871439 diff --git a/checkpoint-671/rng_state_0.pth b/checkpoint-671/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6764da930900576a91ed9455fa2b456a4fea7a1 --- /dev/null +++ b/checkpoint-671/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1e4d2347abaaa6fe6b08023a8a54f311906b950b507620c9f7c0cdcd401de08 +size 21687 diff --git a/checkpoint-671/rng_state_1.pth b/checkpoint-671/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc7f63d8801e95405d3831d31458a8c3b30d5883 --- /dev/null +++ b/checkpoint-671/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544079fab3b826e5dce9acc489d25154737a6bd660854a8028717ee26008f9a4 +size 21687 diff --git a/checkpoint-671/rng_state_2.pth b/checkpoint-671/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..d02bbe44ed25a9d50b133a38e92c29471ca3cd87 --- /dev/null +++ b/checkpoint-671/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c165f6dc87a2991e51c9fc3a9157aefb5d4261bf28ce1e75c20c63d3e414e258 +size 21687 diff --git a/checkpoint-671/rng_state_3.pth b/checkpoint-671/rng_state_3.pth new file mode 100644 index 0000000000000000000000000000000000000000..810b6aab1c9d8879a54b88bcbe991e134590ddd5 --- /dev/null +++ b/checkpoint-671/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c6e30cce5397f5441746ae7478e25224462a248a491c7f3ffcfe4b23d801aad +size 21687 diff --git a/checkpoint-671/rng_state_4.pth b/checkpoint-671/rng_state_4.pth new file mode 100644 index 0000000000000000000000000000000000000000..db9829a1a113e4489334b5913d98650638f15ee7 --- /dev/null +++ b/checkpoint-671/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a14488cd681a89214a37f90536a5b0b408ba70ff9fc45df20f80fd5c4b3ccb +size 21687 diff --git a/checkpoint-671/rng_state_5.pth b/checkpoint-671/rng_state_5.pth new file mode 100644 index 0000000000000000000000000000000000000000..77c66e9efe7ae75827ff9ebf67c030db3ef8f60d --- /dev/null +++ b/checkpoint-671/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa86a7f9ba14c1ced39bd14662f173af3b7617fe740644b7824d65bd873bc3d +size 21687 diff --git a/checkpoint-671/rng_state_6.pth b/checkpoint-671/rng_state_6.pth new file mode 100644 index 0000000000000000000000000000000000000000..06c8a13b28f06dfaf15964b60d8c92b5a47e8e02 --- /dev/null +++ b/checkpoint-671/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82d896a8b7ed957a6863ebc3e5262a9c3b6bf895ff7cdcce511ff4b907395897 +size 21687 diff --git a/checkpoint-671/rng_state_7.pth b/checkpoint-671/rng_state_7.pth new file mode 100644 index 0000000000000000000000000000000000000000..e86cbd4568f3cf067e339ca56d98997a10d8f7d4 --- /dev/null +++ b/checkpoint-671/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb2cc47e311f59ccba651e8282feb7e8ff6560547e27052498506bcc93bdd44 +size 21687 diff --git a/checkpoint-671/scheduler.pt b/checkpoint-671/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..043c494ab636fe456f233813632bba229cc37fc9 --- /dev/null +++ b/checkpoint-671/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e90ce6649713daef503c0a8b492fc254373f8609344fafc3bdb955f8f27b2fad +size 627 diff --git a/checkpoint-671/trainer_state.json b/checkpoint-671/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9bbaefeddb408ae6445b9118a2c36edd0f44260c --- /dev/null +++ b/checkpoint-671/trainer_state.json @@ -0,0 +1,4079 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 168, + "global_step": 671, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.3745, + "step": 1 + }, + { + "epoch": 0.0, + "eval_loss": 1.6296857595443726, + "eval_runtime": 2.6662, + "eval_samples_per_second": 409.572, + "eval_steps_per_second": 25.88, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.42, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 1.3057, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 1.2307, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 1.289, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 6e-05, + "loss": 1.4111, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 7e-05, + "loss": 1.3089, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 1.3204, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 9e-05, + "loss": 1.3575, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001, + "loss": 1.3279, + "step": 10 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011000000000000002, + "loss": 1.3149, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012, + "loss": 1.2578, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013000000000000002, + "loss": 1.2849, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014, + "loss": 1.2971, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015000000000000001, + "loss": 1.1473, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 1.1943, + "step": 16 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017, + "loss": 1.1877, + "step": 17 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018, + "loss": 1.1984, + "step": 18 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019, + "loss": 1.2647, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.217, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999993046535236, + "loss": 1.0274, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999972186150606, + "loss": 1.2122, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999937418875124, + "loss": 1.1868, + "step": 23 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999888744757143, + "loss": 1.2345, + "step": 24 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999826163864348, + "loss": 1.2127, + "step": 25 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999749676283775, + "loss": 1.2114, + "step": 26 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019999659282121792, + "loss": 1.2224, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999955498150411, + "loss": 1.1517, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999943677457578, + "loss": 1.1631, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999930466150119, + "loss": 1.0465, + "step": 30 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999915864246407, + "loss": 1.1847, + "step": 31 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999899871766749, + "loss": 1.1238, + "step": 32 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999882488733385, + "loss": 1.1491, + "step": 33 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199986371517049, + "loss": 1.276, + "step": 34 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001999843551104172, + "loss": 1.0911, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019998219965624734, + "loss": 1.1276, + "step": 36 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997990515753693, + "loss": 1.0981, + "step": 37 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997747161747695, + "loss": 1.0901, + "step": 38 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001999748990394517, + "loss": 1.096, + "step": 39 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019997218742703887, + "loss": 1.122, + "step": 40 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996933678400946, + "loss": 1.1132, + "step": 41 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996634711432786, + "loss": 1.1498, + "step": 42 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019996321842215173, + "loss": 1.0708, + "step": 43 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999599507118322, + "loss": 1.1154, + "step": 44 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995654398791355, + "loss": 1.2118, + "step": 45 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019995299825513357, + "loss": 1.0919, + "step": 46 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994931351842327, + "loss": 1.1364, + "step": 47 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019994548978290695, + "loss": 1.1442, + "step": 48 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001999415270539023, + "loss": 1.1248, + "step": 49 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019993742533692022, + "loss": 1.1366, + "step": 50 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019993318463766495, + "loss": 1.1437, + "step": 51 + }, + { + "epoch": 0.08, + "learning_rate": 0.000199928804962034, + "loss": 1.1191, + "step": 52 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999242863161182, + "loss": 1.0786, + "step": 53 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991962870620153, + "loss": 1.1951, + "step": 54 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019991483213876134, + "loss": 1.1321, + "step": 55 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019990989662046818, + "loss": 1.0876, + "step": 56 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001999048221581858, + "loss": 1.1794, + "step": 57 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989960875897126, + "loss": 1.1796, + "step": 58 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019989425643007476, + "loss": 1.1165, + "step": 59 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998887651789398, + "loss": 1.1978, + "step": 60 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019988313501320297, + "loss": 1.1693, + "step": 61 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019987736594069414, + "loss": 1.1553, + "step": 62 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001998714579694363, + "loss": 1.1959, + "step": 63 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019986541110764565, + "loss": 1.1945, + "step": 64 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985922536373146, + "loss": 1.121, + "step": 65 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019985290074629627, + "loss": 1.122, + "step": 66 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019984643726413565, + "loss": 1.1435, + "step": 67 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019983983492623833, + "loss": 1.0413, + "step": 68 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998330937417861, + "loss": 1.078, + "step": 69 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998262137201539, + "loss": 1.0811, + "step": 70 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981919487090972, + "loss": 1.1639, + "step": 71 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019981203720381463, + "loss": 1.164, + "step": 72 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019980474072882277, + "loss": 1.1006, + "step": 73 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019979730545608126, + "loss": 1.1926, + "step": 74 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001997897313959303, + "loss": 1.1129, + "step": 75 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019978201855890308, + "loss": 1.1367, + "step": 76 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019977416695572578, + "loss": 1.1495, + "step": 77 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997661765973176, + "loss": 1.1567, + "step": 78 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019975804749479062, + "loss": 1.2102, + "step": 79 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019974977965945, + "loss": 1.1175, + "step": 80 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001997413731027937, + "loss": 1.1243, + "step": 81 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019973282783651263, + "loss": 1.1406, + "step": 82 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019972414387249072, + "loss": 1.09, + "step": 83 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019971532122280464, + "loss": 1.0115, + "step": 84 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019970635989972402, + "loss": 1.0328, + "step": 85 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019969725991571128, + "loss": 1.1226, + "step": 86 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019968802128342172, + "loss": 1.0747, + "step": 87 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019967864401570343, + "loss": 1.119, + "step": 88 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019966912812559732, + "loss": 1.1125, + "step": 89 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019965947362633708, + "loss": 1.0734, + "step": 90 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996496805313491, + "loss": 1.1798, + "step": 91 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019963974885425266, + "loss": 1.1461, + "step": 92 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001996296786088596, + "loss": 1.0397, + "step": 93 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019961946980917456, + "loss": 1.17, + "step": 94 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019960912246939485, + "loss": 1.0679, + "step": 95 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019959863660391045, + "loss": 1.0839, + "step": 96 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019958801222730394, + "loss": 1.0937, + "step": 97 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019957724935435063, + "loss": 1.1668, + "step": 98 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019956634800001832, + "loss": 1.0858, + "step": 99 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019955530817946748, + "loss": 1.0935, + "step": 100 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019954412990805107, + "loss": 1.1046, + "step": 101 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019953281320131468, + "loss": 1.1319, + "step": 102 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019952135807499633, + "loss": 1.1108, + "step": 103 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001995097645450266, + "loss": 1.0485, + "step": 104 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019949803262752855, + "loss": 1.0862, + "step": 105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019948616233881768, + "loss": 1.268, + "step": 106 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019947415369540189, + "loss": 1.0926, + "step": 107 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001994620067139815, + "loss": 1.1427, + "step": 108 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019944972141144928, + "loss": 1.0754, + "step": 109 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019943729780489027, + "loss": 1.0044, + "step": 110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001994247359115819, + "loss": 1.1304, + "step": 111 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019941203574899393, + "loss": 1.1683, + "step": 112 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019939919733478838, + "loss": 1.1559, + "step": 113 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019938622068681953, + "loss": 1.1879, + "step": 114 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019937310582313392, + "loss": 1.0613, + "step": 115 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993598527619703, + "loss": 1.1196, + "step": 116 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001993464615217596, + "loss": 1.0762, + "step": 117 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019933293212112495, + "loss": 1.1059, + "step": 118 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019931926457888156, + "loss": 1.0831, + "step": 119 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019930545891403678, + "loss": 1.0552, + "step": 120 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019929151514579008, + "loss": 1.15, + "step": 121 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019927743329353295, + "loss": 1.1038, + "step": 122 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992632133768489, + "loss": 1.067, + "step": 123 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001992488554155135, + "loss": 1.1311, + "step": 124 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019923435942949426, + "loss": 1.1402, + "step": 125 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019921972543895066, + "loss": 1.0453, + "step": 126 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019920495346423402, + "loss": 1.1567, + "step": 127 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019919004352588767, + "loss": 1.137, + "step": 128 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001991749956446468, + "loss": 0.9986, + "step": 129 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019915980984143832, + "loss": 1.083, + "step": 130 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019914448613738106, + "loss": 1.0619, + "step": 131 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019912902455378556, + "loss": 1.1294, + "step": 132 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019911342511215414, + "loss": 1.0965, + "step": 133 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019909768783418086, + "loss": 1.0216, + "step": 134 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019908181274175138, + "loss": 1.0081, + "step": 135 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990657998569432, + "loss": 1.0246, + "step": 136 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001990496492020252, + "loss": 1.1249, + "step": 137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019903336079945804, + "loss": 1.0518, + "step": 138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019901693467189386, + "loss": 1.189, + "step": 139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019900037084217637, + "loss": 1.1475, + "step": 140 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989836693333408, + "loss": 1.2259, + "step": 141 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989668301686138, + "loss": 1.0399, + "step": 142 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001989498533714135, + "loss": 1.128, + "step": 143 + }, + { + "epoch": 0.21, + "learning_rate": 0.00019893273896534936, + "loss": 1.014, + "step": 144 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001989154869742223, + "loss": 1.1552, + "step": 145 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019889809742202455, + "loss": 1.1159, + "step": 146 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988805703329396, + "loss": 1.0218, + "step": 147 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019886290573134228, + "loss": 1.1723, + "step": 148 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001988451036417986, + "loss": 1.2132, + "step": 149 + }, + { + "epoch": 0.22, + "learning_rate": 0.00019882716408906585, + "loss": 1.112, + "step": 150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001988090870980924, + "loss": 1.0856, + "step": 151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001987908726940178, + "loss": 1.0951, + "step": 152 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019877252090217271, + "loss": 1.0218, + "step": 153 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019875403174807882, + "loss": 1.0552, + "step": 154 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019873540525744887, + "loss": 1.1481, + "step": 155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019871664145618657, + "loss": 1.169, + "step": 156 + }, + { + "epoch": 0.23, + "learning_rate": 0.00019869774037038665, + "loss": 1.0802, + "step": 157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986787020263347, + "loss": 1.0871, + "step": 158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986595264505072, + "loss": 1.1022, + "step": 159 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019864021366957147, + "loss": 1.0257, + "step": 160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001986207637103857, + "loss": 1.0986, + "step": 161 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019860117659999878, + "loss": 1.0837, + "step": 162 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019858145236565037, + "loss": 1.1895, + "step": 163 + }, + { + "epoch": 0.24, + "learning_rate": 0.00019856159103477086, + "loss": 1.052, + "step": 164 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019854159263498123, + "loss": 1.1184, + "step": 165 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001985214571940931, + "loss": 1.0895, + "step": 166 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019850118474010872, + "loss": 1.0764, + "step": 167 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019848077530122083, + "loss": 1.1387, + "step": 168 + }, + { + "epoch": 0.25, + "eval_loss": 1.084919810295105, + "eval_runtime": 2.6029, + "eval_samples_per_second": 419.538, + "eval_steps_per_second": 26.509, + "step": 168 + }, + { + "epoch": 0.25, + "learning_rate": 0.00019846022890581267, + "loss": 1.0826, + "step": 169 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198439545582458, + "loss": 1.1366, + "step": 170 + }, + { + "epoch": 0.25, + "learning_rate": 0.000198418725359921, + "loss": 1.1349, + "step": 171 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019839776826715614, + "loss": 1.0636, + "step": 172 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019837667433330838, + "loss": 1.1216, + "step": 173 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001983554435877128, + "loss": 1.1051, + "step": 174 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019833407605989494, + "loss": 1.1558, + "step": 175 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019831257177957044, + "loss": 1.0364, + "step": 176 + }, + { + "epoch": 0.26, + "learning_rate": 0.00019829093077664513, + "loss": 1.0665, + "step": 177 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019826915308121504, + "loss": 1.1994, + "step": 178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982472387235662, + "loss": 1.1434, + "step": 179 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001982251877341748, + "loss": 1.081, + "step": 180 + }, + { + "epoch": 0.27, + "learning_rate": 0.000198203000143707, + "loss": 1.0653, + "step": 181 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981806759830189, + "loss": 1.0269, + "step": 182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001981582152831566, + "loss": 1.1167, + "step": 183 + }, + { + "epoch": 0.27, + "learning_rate": 0.00019813561807535598, + "loss": 1.0608, + "step": 184 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001981128843910428, + "loss": 1.0989, + "step": 185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980900142618327, + "loss": 1.1405, + "step": 186 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019806700771953097, + "loss": 1.0359, + "step": 187 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980438647961327, + "loss": 1.1073, + "step": 188 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001980205855238225, + "loss": 1.0338, + "step": 189 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019799716993497475, + "loss": 1.1285, + "step": 190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00019797361806215332, + "loss": 1.1277, + "step": 191 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019794992993811165, + "loss": 1.119, + "step": 192 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019792610559579265, + "loss": 1.1224, + "step": 193 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019790214506832868, + "loss": 1.1438, + "step": 194 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001978780483890414, + "loss": 1.1462, + "step": 195 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019785381559144196, + "loss": 1.042, + "step": 196 + }, + { + "epoch": 0.29, + "learning_rate": 0.00019782944670923076, + "loss": 1.1022, + "step": 197 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019780494177629735, + "loss": 1.0564, + "step": 198 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019778030082672068, + "loss": 1.0471, + "step": 199 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019775552389476864, + "loss": 1.0636, + "step": 200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001977306110148984, + "loss": 1.0917, + "step": 201 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019770556222175608, + "loss": 1.1965, + "step": 202 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019768037755017685, + "loss": 1.073, + "step": 203 + }, + { + "epoch": 0.3, + "learning_rate": 0.00019765505703518496, + "loss": 1.0636, + "step": 204 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019762960071199333, + "loss": 1.087, + "step": 205 + }, + { + "epoch": 0.31, + "learning_rate": 0.000197604008616004, + "loss": 1.0569, + "step": 206 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019757828078280766, + "loss": 1.08, + "step": 207 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019755241724818387, + "loss": 1.1536, + "step": 208 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019752641804810084, + "loss": 1.1514, + "step": 209 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019750028321871546, + "loss": 1.0691, + "step": 210 + }, + { + "epoch": 0.31, + "learning_rate": 0.00019747401279637325, + "loss": 1.1289, + "step": 211 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019744760681760832, + "loss": 1.0834, + "step": 212 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019742106531914328, + "loss": 1.0762, + "step": 213 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001973943883378892, + "loss": 1.0913, + "step": 214 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019736757591094558, + "loss": 1.132, + "step": 215 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019734062807560027, + "loss": 1.0894, + "step": 216 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019731354486932944, + "loss": 1.0327, + "step": 217 + }, + { + "epoch": 0.32, + "learning_rate": 0.00019728632632979746, + "loss": 1.112, + "step": 218 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019725897249485704, + "loss": 1.0718, + "step": 219 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019723148340254892, + "loss": 1.077, + "step": 220 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019720385909110198, + "loss": 1.0335, + "step": 221 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019717609959893318, + "loss": 1.0483, + "step": 222 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019714820496464746, + "loss": 1.0901, + "step": 223 + }, + { + "epoch": 0.33, + "learning_rate": 0.00019712017522703764, + "loss": 0.9921, + "step": 224 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019709201042508455, + "loss": 1.0829, + "step": 225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970637105979567, + "loss": 1.0705, + "step": 226 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001970352757850105, + "loss": 1.0481, + "step": 227 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019700670602579008, + "loss": 0.9846, + "step": 228 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001969780013600272, + "loss": 1.1492, + "step": 229 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019694916182764113, + "loss": 1.1745, + "step": 230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00019692018746873892, + "loss": 1.0451, + "step": 231 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019689107832361496, + "loss": 1.1217, + "step": 232 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019686183443275116, + "loss": 1.0788, + "step": 233 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019683245583681675, + "loss": 1.0703, + "step": 234 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019680294257666837, + "loss": 1.1521, + "step": 235 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967732946933499, + "loss": 1.0659, + "step": 236 + }, + { + "epoch": 0.35, + "learning_rate": 0.00019674351222809242, + "loss": 1.0321, + "step": 237 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001967135952223142, + "loss": 1.0555, + "step": 238 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019668354371762066, + "loss": 1.0648, + "step": 239 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019665335775580415, + "loss": 1.0723, + "step": 240 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001966230373788441, + "loss": 1.0264, + "step": 241 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019659258262890683, + "loss": 1.0331, + "step": 242 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019656199354834558, + "loss": 1.1514, + "step": 243 + }, + { + "epoch": 0.36, + "learning_rate": 0.00019653127017970034, + "loss": 1.069, + "step": 244 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019650041256569792, + "loss": 0.9623, + "step": 245 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019646942074925172, + "loss": 1.0021, + "step": 246 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019643829477346188, + "loss": 1.1131, + "step": 247 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001964070346816151, + "loss": 1.1426, + "step": 248 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001963756405171845, + "loss": 1.0761, + "step": 249 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019634411232382978, + "loss": 1.1112, + "step": 250 + }, + { + "epoch": 0.37, + "learning_rate": 0.00019631245014539698, + "loss": 1.081, + "step": 251 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019628065402591845, + "loss": 1.1446, + "step": 252 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019624872400961284, + "loss": 1.045, + "step": 253 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019621666014088494, + "loss": 1.0337, + "step": 254 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019618446246432583, + "loss": 1.1764, + "step": 255 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019615213102471257, + "loss": 1.0323, + "step": 256 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019611966586700823, + "loss": 1.0073, + "step": 257 + }, + { + "epoch": 0.38, + "learning_rate": 0.00019608706703636188, + "loss": 1.1615, + "step": 258 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019605433457810855, + "loss": 1.1209, + "step": 259 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019602146853776894, + "loss": 1.0721, + "step": 260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001959884689610497, + "loss": 1.0967, + "step": 261 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019595533589384308, + "loss": 1.0284, + "step": 262 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019592206938222703, + "loss": 1.0148, + "step": 263 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019588866947246498, + "loss": 1.1434, + "step": 264 + }, + { + "epoch": 0.39, + "learning_rate": 0.00019585513621100603, + "loss": 1.1125, + "step": 265 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001958214696444846, + "loss": 1.0812, + "step": 266 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019578766981972058, + "loss": 1.0611, + "step": 267 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019575373678371909, + "loss": 1.1029, + "step": 268 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019571967058367064, + "loss": 1.0692, + "step": 269 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019568547126695083, + "loss": 1.0581, + "step": 270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00019565113888112036, + "loss": 0.9841, + "step": 271 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019561667347392508, + "loss": 1.0173, + "step": 272 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019558207509329584, + "loss": 1.0805, + "step": 273 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019554734378734824, + "loss": 1.088, + "step": 274 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019551247960438296, + "loss": 1.0481, + "step": 275 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019547748259288536, + "loss": 1.1747, + "step": 276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001954423528015255, + "loss": 1.0407, + "step": 277 + }, + { + "epoch": 0.41, + "learning_rate": 0.00019540709027915818, + "loss": 1.1412, + "step": 278 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953716950748227, + "loss": 1.075, + "step": 279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019533616723774294, + "loss": 0.9863, + "step": 280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001953005068173272, + "loss": 1.1426, + "step": 281 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001952647138631682, + "loss": 1.0621, + "step": 282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019522878842504295, + "loss": 1.1007, + "step": 283 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019519273055291266, + "loss": 1.0632, + "step": 284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00019515654029692278, + "loss": 1.126, + "step": 285 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019512021770740288, + "loss": 1.0946, + "step": 286 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001950837628348665, + "loss": 1.0639, + "step": 287 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019504717573001117, + "loss": 1.1432, + "step": 288 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019501045644371832, + "loss": 1.0619, + "step": 289 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001949736050270532, + "loss": 1.0597, + "step": 290 + }, + { + "epoch": 0.43, + "learning_rate": 0.00019493662153126481, + "loss": 1.0743, + "step": 291 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001948995060077859, + "loss": 1.1114, + "step": 292 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019486225850823266, + "loss": 1.1435, + "step": 293 + }, + { + "epoch": 0.44, + "learning_rate": 0.000194824879084405, + "loss": 1.1396, + "step": 294 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019478736778828624, + "loss": 1.1597, + "step": 295 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019474972467204297, + "loss": 1.0976, + "step": 296 + }, + { + "epoch": 0.44, + "learning_rate": 0.00019471194978802533, + "loss": 1.0829, + "step": 297 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001946740431887665, + "loss": 1.0437, + "step": 298 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019463600492698296, + "loss": 1.0835, + "step": 299 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019459783505557424, + "loss": 1.0558, + "step": 300 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001945595336276229, + "loss": 1.0656, + "step": 301 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019452110069639452, + "loss": 1.1487, + "step": 302 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019448253631533744, + "loss": 1.1383, + "step": 303 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019444384053808288, + "loss": 1.1582, + "step": 304 + }, + { + "epoch": 0.45, + "learning_rate": 0.00019440501341844483, + "loss": 0.9999, + "step": 305 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019436605501041987, + "loss": 1.1317, + "step": 306 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019432696536818717, + "loss": 1.0944, + "step": 307 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019428774454610843, + "loss": 1.1624, + "step": 308 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019424839259872778, + "loss": 1.1644, + "step": 309 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019420890958077167, + "loss": 1.0486, + "step": 310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019416929554714888, + "loss": 1.0705, + "step": 311 + }, + { + "epoch": 0.46, + "learning_rate": 0.00019412955055295034, + "loss": 1.023, + "step": 312 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019408967465344917, + "loss": 1.1144, + "step": 313 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019404966790410047, + "loss": 1.0378, + "step": 314 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019400953036054138, + "loss": 1.036, + "step": 315 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019396926207859084, + "loss": 1.0735, + "step": 316 + }, + { + "epoch": 0.47, + "learning_rate": 0.00019392886311424973, + "loss": 1.0259, + "step": 317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001938883335237006, + "loss": 1.1603, + "step": 318 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938476733633076, + "loss": 1.1282, + "step": 319 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001938068826896166, + "loss": 1.063, + "step": 320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019376596155935486, + "loss": 1.1176, + "step": 321 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019372491002943112, + "loss": 1.1307, + "step": 322 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019368372815693549, + "loss": 1.0412, + "step": 323 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019364241599913924, + "loss": 1.1353, + "step": 324 + }, + { + "epoch": 0.48, + "learning_rate": 0.00019360097361349494, + "loss": 1.1293, + "step": 325 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001935594010576362, + "loss": 1.0885, + "step": 326 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019351769838937775, + "loss": 1.0944, + "step": 327 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019347586566671512, + "loss": 1.1435, + "step": 328 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001934339029478248, + "loss": 1.1217, + "step": 329 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019339181029106404, + "loss": 1.1801, + "step": 330 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019334958775497083, + "loss": 1.1846, + "step": 331 + }, + { + "epoch": 0.49, + "learning_rate": 0.00019330723539826375, + "loss": 1.0897, + "step": 332 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019326475327984192, + "loss": 1.0643, + "step": 333 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019322214145878487, + "loss": 1.0246, + "step": 334 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001931793999943526, + "loss": 1.1108, + "step": 335 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019313652894598543, + "loss": 1.0619, + "step": 336 + }, + { + "epoch": 0.5, + "eval_loss": 1.048388123512268, + "eval_runtime": 2.6045, + "eval_samples_per_second": 419.273, + "eval_steps_per_second": 26.493, + "step": 336 + }, + { + "epoch": 0.5, + "learning_rate": 0.00019309352837330372, + "loss": 1.0014, + "step": 337 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001930503983361081, + "loss": 1.0786, + "step": 338 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019300713889437926, + "loss": 1.014, + "step": 339 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019296375010827773, + "loss": 1.1233, + "step": 340 + }, + { + "epoch": 0.51, + "learning_rate": 0.000192920232038144, + "loss": 1.1052, + "step": 341 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001928765847444984, + "loss": 1.0138, + "step": 342 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019283280828804081, + "loss": 1.1536, + "step": 343 + }, + { + "epoch": 0.51, + "learning_rate": 0.00019278890272965096, + "loss": 0.992, + "step": 344 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001927448681303879, + "loss": 1.1165, + "step": 345 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001927007045514903, + "loss": 1.0565, + "step": 346 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019265641205437611, + "loss": 1.0664, + "step": 347 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001926119907006426, + "loss": 1.0625, + "step": 348 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019256744055206622, + "loss": 1.0393, + "step": 349 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001925227616706026, + "loss": 1.125, + "step": 350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019247795411838627, + "loss": 1.0375, + "step": 351 + }, + { + "epoch": 0.52, + "learning_rate": 0.00019243301795773086, + "loss": 1.0648, + "step": 352 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001923879532511287, + "loss": 1.0903, + "step": 353 + }, + { + "epoch": 0.53, + "learning_rate": 0.000192342760061251, + "loss": 1.1219, + "step": 354 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019229743845094755, + "loss": 1.054, + "step": 355 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001922519884832469, + "loss": 1.1206, + "step": 356 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019220641022135588, + "loss": 1.1125, + "step": 357 + }, + { + "epoch": 0.53, + "learning_rate": 0.00019216070372865996, + "loss": 1.064, + "step": 358 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001921148690687228, + "loss": 1.0843, + "step": 359 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019206890630528634, + "loss": 1.1378, + "step": 360 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019202281550227064, + "loss": 1.0399, + "step": 361 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919765967237739, + "loss": 1.1762, + "step": 362 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001919302500340722, + "loss": 1.0538, + "step": 363 + }, + { + "epoch": 0.54, + "learning_rate": 0.00019188377549761963, + "loss": 1.0343, + "step": 364 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001918371731790479, + "loss": 1.1027, + "step": 365 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019179044314316664, + "loss": 1.036, + "step": 366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019174358545496288, + "loss": 1.041, + "step": 367 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019169660017960137, + "loss": 1.0762, + "step": 368 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019164948738242409, + "loss": 1.0807, + "step": 369 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019160224712895055, + "loss": 1.037, + "step": 370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00019155487948487748, + "loss": 1.0625, + "step": 371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001915073845160786, + "loss": 1.062, + "step": 372 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019145976228860496, + "loss": 1.1882, + "step": 373 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019141201286868435, + "loss": 1.1338, + "step": 374 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019136413632272163, + "loss": 1.0174, + "step": 375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019131613271729833, + "loss": 1.0585, + "step": 376 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019126800211917276, + "loss": 1.0495, + "step": 377 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001912197445952798, + "loss": 1.123, + "step": 378 + }, + { + "epoch": 0.56, + "learning_rate": 0.00019117136021273075, + "loss": 1.0517, + "step": 379 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001911228490388136, + "loss": 1.0545, + "step": 380 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019107421114099237, + "loss": 1.0302, + "step": 381 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019102544658690748, + "loss": 1.0908, + "step": 382 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019097655544437545, + "loss": 1.1425, + "step": 383 + }, + { + "epoch": 0.57, + "learning_rate": 0.00019092753778138886, + "loss": 1.0686, + "step": 384 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001908783936661162, + "loss": 1.06, + "step": 385 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001908291231669019, + "loss": 1.1296, + "step": 386 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019077972635226604, + "loss": 1.1029, + "step": 387 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019073020329090444, + "loss": 1.0469, + "step": 388 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001906805540516885, + "loss": 1.0427, + "step": 389 + }, + { + "epoch": 0.58, + "learning_rate": 0.000190630778703665, + "loss": 1.0075, + "step": 390 + }, + { + "epoch": 0.58, + "learning_rate": 0.00019058087731605624, + "loss": 1.1146, + "step": 391 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001905308499582597, + "loss": 1.1161, + "step": 392 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019048069669984802, + "loss": 1.1419, + "step": 393 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019043041761056907, + "loss": 1.1586, + "step": 394 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019038001276034557, + "loss": 1.0765, + "step": 395 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019032948221927524, + "loss": 1.1225, + "step": 396 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001902788260576305, + "loss": 1.0247, + "step": 397 + }, + { + "epoch": 0.59, + "learning_rate": 0.00019022804434585852, + "loss": 1.135, + "step": 398 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001901771371545811, + "loss": 1.1122, + "step": 399 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019012610455459446, + "loss": 1.075, + "step": 400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00019007494661686935, + "loss": 1.1121, + "step": 401 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001900236634125507, + "loss": 1.0531, + "step": 402 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018997225501295772, + "loss": 1.0561, + "step": 403 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018992072148958368, + "loss": 1.0803, + "step": 404 + }, + { + "epoch": 0.6, + "learning_rate": 0.00018986906291409595, + "loss": 1.0579, + "step": 405 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018981727935833567, + "loss": 1.0614, + "step": 406 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001897653708943179, + "loss": 0.9982, + "step": 407 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018971333759423142, + "loss": 1.1498, + "step": 408 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018966117953043852, + "loss": 1.1165, + "step": 409 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018960889677547505, + "loss": 1.1155, + "step": 410 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018955648940205028, + "loss": 1.0017, + "step": 411 + }, + { + "epoch": 0.61, + "learning_rate": 0.00018950395748304678, + "loss": 1.0556, + "step": 412 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018945130109152033, + "loss": 1.0248, + "step": 413 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018939852030069981, + "loss": 1.0155, + "step": 414 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018934561518398706, + "loss": 1.0248, + "step": 415 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018929258581495685, + "loss": 0.9835, + "step": 416 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001892394322673568, + "loss": 1.1602, + "step": 417 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001891861546151071, + "loss": 1.021, + "step": 418 + }, + { + "epoch": 0.62, + "learning_rate": 0.00018913275293230069, + "loss": 1.0526, + "step": 419 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018907922729320285, + "loss": 1.0585, + "step": 420 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018902557777225135, + "loss": 1.0327, + "step": 421 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018897180444405614, + "loss": 1.0448, + "step": 422 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001889179073833995, + "loss": 1.0776, + "step": 423 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888638866652356, + "loss": 1.0748, + "step": 424 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001888097423646907, + "loss": 1.0482, + "step": 425 + }, + { + "epoch": 0.63, + "learning_rate": 0.00018875547455706295, + "loss": 1.0394, + "step": 426 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018870108331782217, + "loss": 1.0646, + "step": 427 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018864656872260985, + "loss": 1.0338, + "step": 428 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018859193084723913, + "loss": 0.9848, + "step": 429 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001885371697676944, + "loss": 1.0587, + "step": 430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001884822855601316, + "loss": 1.0711, + "step": 431 + }, + { + "epoch": 0.64, + "learning_rate": 0.00018842727830087778, + "loss": 1.0964, + "step": 432 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018837214806643115, + "loss": 1.0254, + "step": 433 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018831689493346095, + "loss": 1.0748, + "step": 434 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018826151897880728, + "loss": 1.0797, + "step": 435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018820602027948114, + "loss": 1.1068, + "step": 436 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018815039891266418, + "loss": 1.081, + "step": 437 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001880946549557086, + "loss": 1.0685, + "step": 438 + }, + { + "epoch": 0.65, + "learning_rate": 0.00018803878848613716, + "loss": 1.0916, + "step": 439 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018798279958164295, + "loss": 1.115, + "step": 440 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018792668832008936, + "loss": 1.0048, + "step": 441 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001878704547795099, + "loss": 1.0386, + "step": 442 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018781409903810821, + "loss": 1.0283, + "step": 443 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018775762117425777, + "loss": 1.085, + "step": 444 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018770102126650198, + "loss": 1.0582, + "step": 445 + }, + { + "epoch": 0.66, + "learning_rate": 0.00018764429939355392, + "loss": 1.0705, + "step": 446 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001875874556342963, + "loss": 1.1426, + "step": 447 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018753049006778132, + "loss": 1.0337, + "step": 448 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001874734027732306, + "loss": 1.0993, + "step": 449 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018741619383003507, + "loss": 1.0661, + "step": 450 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018735886331775476, + "loss": 1.0564, + "step": 451 + }, + { + "epoch": 0.67, + "learning_rate": 0.00018730141131611882, + "loss": 1.0989, + "step": 452 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001872438379050254, + "loss": 1.0984, + "step": 453 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018718614316454133, + "loss": 1.1173, + "step": 454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018712832717490235, + "loss": 1.1005, + "step": 455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018707039001651277, + "loss": 1.0008, + "step": 456 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018701233176994533, + "loss": 1.0701, + "step": 457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018695415251594123, + "loss": 1.0831, + "step": 458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00018689585233541003, + "loss": 1.1165, + "step": 459 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018683743130942928, + "loss": 1.0884, + "step": 460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018677888951924474, + "loss": 0.9882, + "step": 461 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018672022704627002, + "loss": 1.086, + "step": 462 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018666144397208668, + "loss": 1.0545, + "step": 463 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018660254037844388, + "loss": 1.0274, + "step": 464 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001865435163472584, + "loss": 1.0795, + "step": 465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00018648437196061462, + "loss": 1.022, + "step": 466 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001864251073007642, + "loss": 1.0717, + "step": 467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018636572245012606, + "loss": 1.1501, + "step": 468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001863062174912863, + "loss": 1.1034, + "step": 469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018624659250699805, + "loss": 1.0784, + "step": 470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018618684758018136, + "loss": 1.1274, + "step": 471 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001861269827939231, + "loss": 1.0643, + "step": 472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00018606699823147676, + "loss": 1.1394, + "step": 473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018600689397626246, + "loss": 0.9665, + "step": 474 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018594667011186678, + "loss": 1.058, + "step": 475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018588632672204264, + "loss": 1.0706, + "step": 476 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001858258638907091, + "loss": 1.0414, + "step": 477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018576528170195146, + "loss": 1.1, + "step": 478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00018570458024002093, + "loss": 1.1114, + "step": 479 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018564375958933459, + "loss": 1.0596, + "step": 480 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001855828198344753, + "loss": 1.0897, + "step": 481 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018552176106019155, + "loss": 1.0316, + "step": 482 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018546058335139733, + "loss": 1.0516, + "step": 483 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001853992867931721, + "loss": 1.0477, + "step": 484 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018533787147076048, + "loss": 1.0432, + "step": 485 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018527633746957234, + "loss": 1.0568, + "step": 486 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018521468487518264, + "loss": 1.114, + "step": 487 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018515291377333112, + "loss": 1.0664, + "step": 488 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850910242499225, + "loss": 1.0162, + "step": 489 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001850290163910261, + "loss": 1.0829, + "step": 490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018496689028287572, + "loss": 1.1078, + "step": 491 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001849046460118698, + "loss": 1.0533, + "step": 492 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018484228366457095, + "loss": 1.0923, + "step": 493 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018477980332770607, + "loss": 1.0516, + "step": 494 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018471720508816614, + "loss": 0.9826, + "step": 495 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018465448903300606, + "loss": 1.1581, + "step": 496 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001845916552494446, + "loss": 1.1268, + "step": 497 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018452870382486432, + "loss": 1.0483, + "step": 498 + }, + { + "epoch": 0.74, + "learning_rate": 0.00018446563484681127, + "loss": 1.1792, + "step": 499 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018440244840299506, + "loss": 1.0918, + "step": 500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001843391445812886, + "loss": 0.9691, + "step": 501 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018427572346972805, + "loss": 1.0581, + "step": 502 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001842121851565128, + "loss": 1.0072, + "step": 503 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018414852973000503, + "loss": 0.9686, + "step": 504 + }, + { + "epoch": 0.75, + "eval_loss": 1.0276715755462646, + "eval_runtime": 2.6054, + "eval_samples_per_second": 419.124, + "eval_steps_per_second": 26.483, + "step": 504 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018408475727872995, + "loss": 1.1221, + "step": 505 + }, + { + "epoch": 0.75, + "learning_rate": 0.00018402086789137546, + "loss": 1.087, + "step": 506 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018395686165679202, + "loss": 1.0599, + "step": 507 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018389273866399275, + "loss": 1.1844, + "step": 508 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018382849900215294, + "loss": 1.046, + "step": 509 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018376414276061032, + "loss": 0.9691, + "step": 510 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018369967002886464, + "loss": 1.0996, + "step": 511 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001836350808965776, + "loss": 1.083, + "step": 512 + }, + { + "epoch": 0.76, + "learning_rate": 0.00018357037545357297, + "loss": 1.0371, + "step": 513 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018350555378983608, + "loss": 1.018, + "step": 514 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018344061599551398, + "loss": 1.095, + "step": 515 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018337556216091517, + "loss": 1.0871, + "step": 516 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001833103923765096, + "loss": 1.0774, + "step": 517 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018324510673292842, + "loss": 1.0337, + "step": 518 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001831797053209639, + "loss": 1.0059, + "step": 519 + }, + { + "epoch": 0.77, + "learning_rate": 0.00018311418823156936, + "loss": 1.0744, + "step": 520 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018304855555585894, + "loss": 0.9732, + "step": 521 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018298280738510752, + "loss": 1.1176, + "step": 522 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018291694381075056, + "loss": 1.1485, + "step": 523 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018285096492438424, + "loss": 1.1044, + "step": 524 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018278487081776476, + "loss": 0.9812, + "step": 525 + }, + { + "epoch": 0.78, + "learning_rate": 0.00018271866158280884, + "loss": 1.0966, + "step": 526 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001826523373115931, + "loss": 1.2406, + "step": 527 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001825858980963543, + "loss": 1.0727, + "step": 528 + }, + { + "epoch": 0.79, + "learning_rate": 0.000182519344029489, + "loss": 0.9966, + "step": 529 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018245267520355346, + "loss": 1.081, + "step": 530 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018238589171126353, + "loss": 1.1104, + "step": 531 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018231899364549455, + "loss": 1.0535, + "step": 532 + }, + { + "epoch": 0.79, + "learning_rate": 0.00018225198109928114, + "loss": 1.0801, + "step": 533 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018218485416581726, + "loss": 1.0726, + "step": 534 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018211761293845585, + "loss": 1.0923, + "step": 535 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018205025751070875, + "loss": 1.0551, + "step": 536 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018198278797624675, + "loss": 1.0495, + "step": 537 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001819152044288992, + "loss": 1.0589, + "step": 538 + }, + { + "epoch": 0.8, + "learning_rate": 0.00018184750696265408, + "loss": 1.0487, + "step": 539 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001817796956716578, + "loss": 1.0491, + "step": 540 + }, + { + "epoch": 0.81, + "learning_rate": 0.000181711770650215, + "loss": 1.0981, + "step": 541 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018164373199278856, + "loss": 1.1706, + "step": 542 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001815755797939994, + "loss": 1.1024, + "step": 543 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018150731414862622, + "loss": 1.0488, + "step": 544 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018143893515160564, + "loss": 1.165, + "step": 545 + }, + { + "epoch": 0.81, + "learning_rate": 0.00018137044289803181, + "loss": 1.0346, + "step": 546 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018130183748315645, + "loss": 1.1179, + "step": 547 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001812331190023886, + "loss": 1.0027, + "step": 548 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018116428755129459, + "loss": 1.1106, + "step": 549 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018109534322559783, + "loss": 1.0479, + "step": 550 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018102628612117865, + "loss": 1.0046, + "step": 551 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001809571163340744, + "loss": 0.9883, + "step": 552 + }, + { + "epoch": 0.82, + "learning_rate": 0.00018088783396047893, + "loss": 1.1018, + "step": 553 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018081843909674276, + "loss": 1.1389, + "step": 554 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018074893183937283, + "loss": 1.0751, + "step": 555 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018067931228503246, + "loss": 1.1475, + "step": 556 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018060958053054096, + "loss": 1.0829, + "step": 557 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018053973667287387, + "loss": 1.0272, + "step": 558 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018046978080916252, + "loss": 1.0668, + "step": 559 + }, + { + "epoch": 0.83, + "learning_rate": 0.00018039971303669407, + "loss": 1.0988, + "step": 560 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018032953345291123, + "loss": 1.0339, + "step": 561 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001802592421554123, + "loss": 1.0654, + "step": 562 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018018883924195085, + "loss": 1.0157, + "step": 563 + }, + { + "epoch": 0.84, + "learning_rate": 0.00018011832481043576, + "loss": 1.0738, + "step": 564 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001800476989589309, + "loss": 1.0742, + "step": 565 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001799769617856552, + "loss": 0.9861, + "step": 566 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001799061133889823, + "loss": 1.0788, + "step": 567 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017983515386744061, + "loss": 1.0539, + "step": 568 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017976408331971298, + "loss": 1.0875, + "step": 569 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001796929018446368, + "loss": 1.0765, + "step": 570 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017962160954120354, + "loss": 1.1336, + "step": 571 + }, + { + "epoch": 0.85, + "learning_rate": 0.000179550206508559, + "loss": 0.9674, + "step": 572 + }, + { + "epoch": 0.85, + "learning_rate": 0.00017947869284600282, + "loss": 1.0607, + "step": 573 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001794070686529886, + "loss": 0.9959, + "step": 574 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017933533402912354, + "loss": 1.038, + "step": 575 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001792634890741685, + "loss": 1.1342, + "step": 576 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017919153388803774, + "loss": 1.0941, + "step": 577 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017911946857079888, + "loss": 1.1286, + "step": 578 + }, + { + "epoch": 0.86, + "learning_rate": 0.00017904729322267256, + "loss": 1.0354, + "step": 579 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001789750079440326, + "loss": 1.1314, + "step": 580 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017890261283540562, + "loss": 1.0365, + "step": 581 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017883010799747099, + "loss": 1.091, + "step": 582 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017875749353106062, + "loss": 0.9995, + "step": 583 + }, + { + "epoch": 0.87, + "learning_rate": 0.000178684769537159, + "loss": 1.0435, + "step": 584 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017861193611690287, + "loss": 1.0555, + "step": 585 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017853899337158112, + "loss": 1.0637, + "step": 586 + }, + { + "epoch": 0.87, + "learning_rate": 0.00017846594140263474, + "loss": 1.064, + "step": 587 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017839278031165658, + "loss": 0.9879, + "step": 588 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017831951020039126, + "loss": 1.0846, + "step": 589 + }, + { + "epoch": 0.88, + "learning_rate": 0.000178246131170735, + "loss": 1.0373, + "step": 590 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017817264332473546, + "loss": 1.0377, + "step": 591 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017809904676459177, + "loss": 1.0932, + "step": 592 + }, + { + "epoch": 0.88, + "learning_rate": 0.00017802534159265404, + "loss": 1.085, + "step": 593 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001779515279114236, + "loss": 1.0975, + "step": 594 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001778776058235526, + "loss": 1.1283, + "step": 595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017780357543184397, + "loss": 1.0652, + "step": 596 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017772943683925122, + "loss": 1.0336, + "step": 597 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017765519014887842, + "loss": 0.9761, + "step": 598 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001775808354639799, + "loss": 1.0688, + "step": 599 + }, + { + "epoch": 0.89, + "learning_rate": 0.00017750637288796016, + "loss": 1.1031, + "step": 600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017743180252437383, + "loss": 1.083, + "step": 601 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017735712447692538, + "loss": 1.1612, + "step": 602 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017728233884946903, + "loss": 1.1618, + "step": 603 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017720744574600863, + "loss": 1.144, + "step": 604 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001771324452706975, + "loss": 1.1174, + "step": 605 + }, + { + "epoch": 0.9, + "learning_rate": 0.00017705733752783825, + "loss": 0.9728, + "step": 606 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001769821226218827, + "loss": 1.0599, + "step": 607 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001769068006574317, + "loss": 1.0639, + "step": 608 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017683137173923495, + "loss": 1.1278, + "step": 609 + }, + { + "epoch": 0.91, + "learning_rate": 0.00017675583597219095, + "loss": 0.9925, + "step": 610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766801934613467, + "loss": 1.0457, + "step": 611 + }, + { + "epoch": 0.91, + "learning_rate": 0.0001766044443118978, + "loss": 1.0348, + "step": 612 + }, + { + "epoch": 0.91, + "learning_rate": 0.000176528588629188, + "loss": 1.022, + "step": 613 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017645262651870926, + "loss": 1.0027, + "step": 614 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017637655808610156, + "loss": 1.0491, + "step": 615 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017630038343715275, + "loss": 1.0413, + "step": 616 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017622410267779834, + "loss": 1.0358, + "step": 617 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017614771591412148, + "loss": 1.1125, + "step": 618 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017607122325235267, + "loss": 1.1185, + "step": 619 + }, + { + "epoch": 0.92, + "learning_rate": 0.00017599462479886974, + "loss": 1.0738, + "step": 620 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017591792066019765, + "loss": 1.102, + "step": 621 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017584111094300827, + "loss": 1.065, + "step": 622 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001757641957541203, + "loss": 1.0514, + "step": 623 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001756871752004992, + "loss": 1.0396, + "step": 624 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017561004938925688, + "loss": 1.1027, + "step": 625 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017553281842765169, + "loss": 1.0223, + "step": 626 + }, + { + "epoch": 0.93, + "learning_rate": 0.00017545548242308816, + "loss": 1.1793, + "step": 627 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017537804148311695, + "loss": 1.0642, + "step": 628 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017530049571543464, + "loss": 1.0682, + "step": 629 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017522284522788353, + "loss": 1.0476, + "step": 630 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017514509012845164, + "loss": 1.1064, + "step": 631 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017506723052527242, + "loss": 1.0258, + "step": 632 + }, + { + "epoch": 0.94, + "learning_rate": 0.00017498926652662476, + "loss": 1.1954, + "step": 633 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001749111982409325, + "loss": 1.0637, + "step": 634 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017483302577676475, + "loss": 0.9685, + "step": 635 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017475474924283536, + "loss": 1.0465, + "step": 636 + }, + { + "epoch": 0.95, + "learning_rate": 0.000174676368748003, + "loss": 1.0161, + "step": 637 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017459788440127083, + "loss": 1.0479, + "step": 638 + }, + { + "epoch": 0.95, + "learning_rate": 0.00017451929631178648, + "loss": 1.1166, + "step": 639 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001744406045888419, + "loss": 1.0634, + "step": 640 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017436180934187308, + "loss": 1.0826, + "step": 641 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017428291068046, + "loss": 1.07, + "step": 642 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017420390871432647, + "loss": 1.1167, + "step": 643 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017412480355334005, + "loss": 1.0347, + "step": 644 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017404559530751162, + "loss": 1.0393, + "step": 645 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017396628408699555, + "loss": 1.1108, + "step": 646 + }, + { + "epoch": 0.96, + "learning_rate": 0.00017388687000208946, + "loss": 1.006, + "step": 647 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001738073531632339, + "loss": 1.0932, + "step": 648 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001737277336810124, + "loss": 1.0123, + "step": 649 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017364801166615124, + "loss": 1.1273, + "step": 650 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001735681872295192, + "loss": 0.9893, + "step": 651 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001734882604821276, + "loss": 1.0699, + "step": 652 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017340823153513002, + "loss": 1.0901, + "step": 653 + }, + { + "epoch": 0.97, + "learning_rate": 0.00017332810049982208, + "loss": 1.0212, + "step": 654 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017324786748764155, + "loss": 0.9898, + "step": 655 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017316753261016783, + "loss": 1.0899, + "step": 656 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017308709597912213, + "loss": 1.085, + "step": 657 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017300655770636708, + "loss": 1.091, + "step": 658 + }, + { + "epoch": 0.98, + "learning_rate": 0.00017292591790390665, + "loss": 1.0502, + "step": 659 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001728451766838861, + "loss": 1.2131, + "step": 660 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017276433415859167, + "loss": 1.1256, + "step": 661 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017268339044045042, + "loss": 1.0577, + "step": 662 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017260234564203032, + "loss": 1.0012, + "step": 663 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017252119987603973, + "loss": 1.0611, + "step": 664 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017243995325532755, + "loss": 1.1251, + "step": 665 + }, + { + "epoch": 0.99, + "learning_rate": 0.00017235860589288277, + "loss": 1.0959, + "step": 666 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001722771579018347, + "loss": 1.1413, + "step": 667 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017219560939545246, + "loss": 1.0728, + "step": 668 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017211396048714498, + "loss": 1.0461, + "step": 669 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001720322112904608, + "loss": 1.1084, + "step": 670 + }, + { + "epoch": 1.0, + "learning_rate": 0.00017195036191908797, + "loss": 1.1316, + "step": 671 + } + ], + "logging_steps": 1, + "max_steps": 2684, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 671, + "total_flos": 2.085255620067328e+17, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-671/training_args.bin b/checkpoint-671/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..439fe237329d4c6dab9a083d1f0b3c5d2e07ff34 --- /dev/null +++ b/checkpoint-671/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f074b3cd0fbc5cecae753dfd6c83754f9e22c6bc7af03db47b3beb5a1a41c9 +size 4923 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..333839b0c57e8cb49fa6576df86f04b3499a2afa --- /dev/null +++ b/config.json @@ -0,0 +1,43 @@ +{ + "_name_or_path": "openlm-research/open_llama_3b_v2", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 3200, + "initializer_range": 0.02, + "intermediate_size": 8640, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 26, + "num_key_value_heads": 32, + "pad_token_id": 0, + "pretraining_tp": 1, + "quantization_config": { + "_load_in_4bit": false, + "_load_in_8bit": true, + "bnb_4bit_compute_dtype": "float32", + "bnb_4bit_quant_type": "fp4", + "bnb_4bit_use_double_quant": false, + "llm_int8_enable_fp32_cpu_offload": false, + "llm_int8_has_fp16_weight": false, + "llm_int8_skip_modules": null, + "llm_int8_threshold": 6.0, + "load_in_4bit": false, + "load_in_8bit": true, + "quant_method": "bitsandbytes" + }, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.38.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/runs/Feb14_01-57-29_2361a7421646/events.out.tfevents.1707875851.2361a7421646.182.0 b/runs/Feb14_01-57-29_2361a7421646/events.out.tfevents.1707875851.2361a7421646.182.0 new file mode 100644 index 0000000000000000000000000000000000000000..d6e21d8be80bc00940a6b3604fff70dc2f1dd361 --- /dev/null +++ b/runs/Feb14_01-57-29_2361a7421646/events.out.tfevents.1707875851.2361a7421646.182.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99334ac2a6418de149755cd134e4379cb634820dae7dfb3705354e4bb6b925cf +size 431282 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..72ecfeeb7e14d244c936169d2ed139eeae235ef1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..98866ff8ae3631f331c57923c921a0c9ad22b97d --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91b289e85fa20fd375d8b33dc12f77616f18abc6359804471d1fafcb425fecb8 +size 511574 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..96b32e224dd9cdac495897500d0f9ac70dbb2721 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,43 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 2048, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}