diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/.ipynb_checkpoints/trainer_log-checkpoint.jsonl b/.ipynb_checkpoints/trainer_log-checkpoint.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..7c1cf11720b670aa3a2f609ea9619c5cf328c292 --- /dev/null +++ b/.ipynb_checkpoints/trainer_log-checkpoint.jsonl @@ -0,0 +1,2 @@ +{"current_steps": 5, "total_steps": 5424, "loss": 1.6349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999895164082156e-05, "epoch": 0.0, "percentage": 0.09, "elapsed_time": "0:00:53", "remaining_time": "16:02:24"} +{"current_steps": 10, "total_steps": 5424, "loss": 1.6199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999958065720787e-05, "epoch": 0.01, "percentage": 0.18, "elapsed_time": "0:01:45", "remaining_time": "15:54:15"} diff --git a/.ipynb_checkpoints/training_loss-checkpoint.png b/.ipynb_checkpoints/training_loss-checkpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..b0c3594d36fb66a7b04e81abea0c49e55cbd0a73 Binary files /dev/null and b/.ipynb_checkpoints/training_loss-checkpoint.png differ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a553ab7add3370f03e27b2e5cd4290e7ae645d28 --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1a1ffddcaa42c5b32d83acae8d4ed797725d1955445bb62cfe0a87da0be3a0e +size 16821197 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d24e725cd885c4d901f65601add6860bc1db71a --- /dev/null +++ b/all_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 3.0, + "train_loss": 0.9648243357649947, + "train_runtime": 56978.2271, + "train_samples_per_second": 1.523, + "train_steps_per_second": 0.095 +} \ No newline at end of file diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.bin b/checkpoint-100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd033742f7126afcc09170b479c1e37e2f19bd27 --- /dev/null +++ b/checkpoint-100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea01c09ff608b5c7099cffa399a08b3450a36b101137f99df08de90dc686a292 +size 16821197 diff --git a/checkpoint-100/finetuning_args.json b/checkpoint-100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f43793406eebca76bcfb92d7d969db9f44a9a83 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95c01320b9f42048cfa27aa398a98f285e7e8c6bcac04dac33ce2792a03e716 +size 33629765 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..95eb435cf35c5bfbd6d25e4b25aabc25edc04999 --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456575f7c97fbfb5063cd944a34e372bd839e2035bcd8de1a9f57cee31d0ed2b +size 14575 diff --git a/checkpoint-100/scaler.pt b/checkpoint-100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e00f588d3f0176a99d362447a49f57ff6e1b1ad --- /dev/null +++ b/checkpoint-100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa44e8523f62833816d29aa6c576eaa7783e3bbdb3e132e248b1d8aaee6132b +size 557 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..889cd67f3358f3408c118fcce00ddda035a92f65 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f315341427af6a189cf3f47ec337dfddc9476659f210947cad5a0049c84cec8b +size 627 diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..93accdb98f1b788a5db669fd53da07ba3fd8a3b1 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,136 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.055294442908487694, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 3.2497031184384e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.bin b/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6e0abf1c3d221af72a6f2979b93884fe859af473 --- /dev/null +++ b/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25e10111b464093549a11f134a59c4ac86510a9f7e3bb4d2127534696bfeaeb +size 16821197 diff --git a/checkpoint-1000/finetuning_args.json b/checkpoint-1000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf2fc03f5a8fdc6b6348b3ddaa08a629166477b7 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5121f5f1c38b5973d2409a8547b2d15e2186030c767c45fb3962d1ba9a69c54 +size 33629893 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..94d1e6d53ddd5d8b14d5ad45874232a5a8316283 --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73090c0dec8be67d9b176c7292edb88b14dcff706fff6020d9d9b3c02368ca19 +size 14575 diff --git a/checkpoint-1000/scaler.pt b/checkpoint-1000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..efdbd3c795f6b0d4144e68355e99c220ccdedd09 --- /dev/null +++ b/checkpoint-1000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68cff80b680ddf6e7abbef98b5f336b97f9b5963e2209307f639383870e8cc71 +size 557 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5a3d196ba757a0825ad02f70ce0915a4dbe9901 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866e16a03b3e8054e079edfb5555569d68a0ca587e1d2a7c9fddcbdda27f69eb +size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8785ed56e625867432a7294f9a2d7d5c67d3e164 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,1216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.552944429084877, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 3.2497031184384e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1100/README.md b/checkpoint-1100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1100/adapter_config.json b/checkpoint-1100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1100/adapter_model.bin b/checkpoint-1100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f18e770dce8fc01859dfff97432e22941a673d5b --- /dev/null +++ b/checkpoint-1100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f39d2917cdf1a2e47072114abfc7e0e13b8b4a01cdb69b12330ff29dc83682 +size 16821197 diff --git a/checkpoint-1100/finetuning_args.json b/checkpoint-1100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1100/optimizer.pt b/checkpoint-1100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..751d2bca87968b902524c6edc970c7983b435831 --- /dev/null +++ b/checkpoint-1100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:debf4fa6b8d820fe570930c6a377cf0649a5e504a4143b130831c8de0e1ac4c9 +size 33629893 diff --git a/checkpoint-1100/rng_state.pth b/checkpoint-1100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..512ae14ac48c29fe3083c789bffce7b0166aae77 --- /dev/null +++ b/checkpoint-1100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e287b87acb8fc231da6ca55f911bf5ceb9342f5cb41f129c6c2476f5da07bb01 +size 14575 diff --git a/checkpoint-1100/scaler.pt b/checkpoint-1100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..771593c137af13c8ff804fddebe8fd0c1bd42c8d --- /dev/null +++ b/checkpoint-1100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677eca5ad6580d3f557bbfda51765525add80f9acfd6c5ca0edf4a229f0c70eb +size 557 diff --git a/checkpoint-1100/scheduler.pt b/checkpoint-1100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e77776024fc7e815552d76a1cb38de18a88908b3 --- /dev/null +++ b/checkpoint-1100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb0c259b7af56a0b5a35ca1140f6215c652a78a4d7f2762afa6896791e5af12 +size 627 diff --git a/checkpoint-1100/trainer_state.json b/checkpoint-1100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..40af6f8e286840d78c9d711d75d555a0d43eba47 --- /dev/null +++ b/checkpoint-1100/trainer_state.json @@ -0,0 +1,1336 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6082388719933647, + "global_step": 1100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 3.57467343028224e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1100/training_args.bin b/checkpoint-1100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1200/README.md b/checkpoint-1200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1200/adapter_config.json b/checkpoint-1200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1200/adapter_model.bin b/checkpoint-1200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d43a88ecbc70987f027dffff1e35cf045f2331a2 --- /dev/null +++ b/checkpoint-1200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bd175dd7a77f18c28c84219c23d01119f777040f5bde7e409e8039bbedd76a +size 16821197 diff --git a/checkpoint-1200/finetuning_args.json b/checkpoint-1200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1200/optimizer.pt b/checkpoint-1200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb6a11ec2db42546426d425065b1411725a644f4 --- /dev/null +++ b/checkpoint-1200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f42cf432758369c915b3fcce4fe20934523b628260c5c73468bb131f74182b86 +size 33629893 diff --git a/checkpoint-1200/rng_state.pth b/checkpoint-1200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fbaffdac139e04ef68c00bbc54428d02d45376e4 --- /dev/null +++ b/checkpoint-1200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db1919207ee367db4a2eeeb7e1593dffd38f5b98c3d92d9532ac89f6f8bf7ec +size 14575 diff --git a/checkpoint-1200/scaler.pt b/checkpoint-1200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c73b6e7148d8ae7026711173634e0a11b1b94e2d --- /dev/null +++ b/checkpoint-1200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:741cefeca9ef427f92406d2d10b81996655e2a9d50eb7aaa9614e6fdd1c9f529 +size 557 diff --git a/checkpoint-1200/scheduler.pt b/checkpoint-1200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eee48ceaa867c591aae2e069f8f97ba3455bb0d4 --- /dev/null +++ b/checkpoint-1200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25f5fdd612399be58b9bfcc865f337638ef903ce8239ab85c889e867bd67ae64 +size 627 diff --git a/checkpoint-1200/trainer_state.json b/checkpoint-1200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c34a338ec54859edf9f9779f8b01708c42fdf9f --- /dev/null +++ b/checkpoint-1200/trainer_state.json @@ -0,0 +1,1456 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6635333149018524, + "global_step": 1200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 3.89964374212608e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1200/training_args.bin b/checkpoint-1200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1300/README.md b/checkpoint-1300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1300/adapter_config.json b/checkpoint-1300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1300/adapter_model.bin b/checkpoint-1300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca2f3b79549103e867d84e92c0f088c2b60957ec --- /dev/null +++ b/checkpoint-1300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2278e667677bfbfc7e5bc7aa88f1db8f45950b047e420960a13e2302a5cd6bf +size 16821197 diff --git a/checkpoint-1300/finetuning_args.json b/checkpoint-1300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1300/optimizer.pt b/checkpoint-1300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f59670b3dd6cb2bdd486c7c869b70639c015c02 --- /dev/null +++ b/checkpoint-1300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76481024f47daba7f7116eb4e92f57d2047bd5ec9c0955bbf22ac942a7e48258 +size 33629893 diff --git a/checkpoint-1300/rng_state.pth b/checkpoint-1300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a59c40c6ba0b6d77a516f01639ee64fc1a6311d6 --- /dev/null +++ b/checkpoint-1300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc18f4dbdbcac0e9a158b6f0b3813126bd2631c8d2ea4688719574afc39f16f +size 14575 diff --git a/checkpoint-1300/scaler.pt b/checkpoint-1300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..78a198d02db2fc3f1f220e8ac49686d1ce38c9ea --- /dev/null +++ b/checkpoint-1300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19731f6c29559eca73772c9d98b9ff9c6bd85ab2a569db52899871e5d1ba6fca +size 557 diff --git a/checkpoint-1300/scheduler.pt b/checkpoint-1300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..762384743cda2ac725861a5ee5116fcb0b2c6961 --- /dev/null +++ b/checkpoint-1300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c62eda09f53d572b5dbf838d08c02426f370ce878224db3bd41b59942359a5f5 +size 627 diff --git a/checkpoint-1300/trainer_state.json b/checkpoint-1300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..725c82933d001ee92c726f897959e1684e604c02 --- /dev/null +++ b/checkpoint-1300/trainer_state.json @@ -0,0 +1,1576 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.71882775781034, + "global_step": 1300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 4.22461405396992e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1300/training_args.bin b/checkpoint-1300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1400/README.md b/checkpoint-1400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1400/adapter_config.json b/checkpoint-1400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1400/adapter_model.bin b/checkpoint-1400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fea369a2a8e8ca39ac50fcf9fbf6d460ed153484 --- /dev/null +++ b/checkpoint-1400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c1b02360269c4b7e1b33a63da9a87525fdd5f51abd58d1de8fd19d82b1fd95e +size 16821197 diff --git a/checkpoint-1400/finetuning_args.json b/checkpoint-1400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1400/optimizer.pt b/checkpoint-1400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c3d9fa808da9536526c98f1e85f1ff55252dc85 --- /dev/null +++ b/checkpoint-1400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826cc711bff8f63b4818f5d3fe5f41bbf03f25f115d3d8920f4ec5d1d2bf8853 +size 33629893 diff --git a/checkpoint-1400/rng_state.pth b/checkpoint-1400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ee4493660b2a6014ff3ae5ea8fb07e4de2483ea --- /dev/null +++ b/checkpoint-1400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3207fcbbbd5a5adf598f379fa320cdd8a34419428791f55894f03b31d882bf +size 14575 diff --git a/checkpoint-1400/scaler.pt b/checkpoint-1400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8cb12388904652e2007207f80582007b39a2051 --- /dev/null +++ b/checkpoint-1400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16fdfc03b58220402968eacaac23fb5471cdb9061302380bd3c8d4d326c02ade +size 557 diff --git a/checkpoint-1400/scheduler.pt b/checkpoint-1400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb3e3c4b09a29c2f2bf2fe837bb204033f7cb9c5 --- /dev/null +++ b/checkpoint-1400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1466390a880a2f64b9dd00860dfb3e4729df0aba9c71d542b739064b1a886255 +size 627 diff --git a/checkpoint-1400/trainer_state.json b/checkpoint-1400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..defb8fb4df6af6b87b3f86a58101f4465358b4e5 --- /dev/null +++ b/checkpoint-1400/trainer_state.json @@ -0,0 +1,1696 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7741222007188278, + "global_step": 1400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 4.54958436581376e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1400/training_args.bin b/checkpoint-1400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1500/README.md b/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1500/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1500/adapter_config.json b/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1500/adapter_model.bin b/checkpoint-1500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8f0b1b36dbccb373e423625afaf789d7303d0cf --- /dev/null +++ b/checkpoint-1500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f305db8049807ec3ba1066331153491eb0b2fc6f2acb3dcf398dca8183a1f24 +size 16821197 diff --git a/checkpoint-1500/finetuning_args.json b/checkpoint-1500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1500/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1500/optimizer.pt b/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0745358ac136a9296118c369f7f898cab7490e89 --- /dev/null +++ b/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be79fc586ab631d854dcb7495260efcd222b26a4de1c5dea0bb3316536330e0 +size 33629893 diff --git a/checkpoint-1500/rng_state.pth b/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d66fe2a77c4fae984591f9afc38bad276971e8a --- /dev/null +++ b/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043e06dbebf0ef9717cb79bce6d0ca57be1db009d602bd746d532ff4c60f2089 +size 14575 diff --git a/checkpoint-1500/scaler.pt b/checkpoint-1500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..69c76b8f27f096d1d1a3d9d0e387af1a9cf5028d --- /dev/null +++ b/checkpoint-1500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203a72d6c29f42a0e2964fdddc8d7a98df1eccee78fea9de0fa416613390f5c6 +size 557 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..487d6ba6f8aa0a46ca08756fd1828702684ebfe4 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eac2f1cba6d9f15cfe55ef488380cc4166789067677937b5e4476bc820fae52 +size 627 diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..625ba97730460e165955c0b7db26a416cc98fdec --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,1816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8294166436273155, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 4.8745546776576e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1600/README.md b/checkpoint-1600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1600/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1600/adapter_config.json b/checkpoint-1600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1600/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1600/adapter_model.bin b/checkpoint-1600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4eb4ecc367a356b1bd1b5dcb82df8a2a9d1f92f1 --- /dev/null +++ b/checkpoint-1600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c6f633508057d0cb9249f42a4db84dffa4bfc382f7d79be60bc0c5080145ed +size 16821197 diff --git a/checkpoint-1600/finetuning_args.json b/checkpoint-1600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1600/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1600/optimizer.pt b/checkpoint-1600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a39aab009bdd595fed681a12fe4bae58bafafb1 --- /dev/null +++ b/checkpoint-1600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323b17a2146f257ea963425dc33afc6c113f93f852c824af5e48c32478f17dbd +size 33629893 diff --git a/checkpoint-1600/rng_state.pth b/checkpoint-1600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fe4e46c716914f5f8ec2061968a9d1c7e3cede9 --- /dev/null +++ b/checkpoint-1600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18749aa16d0d80b62e75dce57f29bfc315caecc9c57b3cee09806a9a58409dce +size 14575 diff --git a/checkpoint-1600/scaler.pt b/checkpoint-1600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f03c4ce6fd4620d95ae66b6787fdbcf34c6622a --- /dev/null +++ b/checkpoint-1600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbb3b0bb9c64d37bf898d0431b3ed1f0a6f5c9d8c2b563e0f884424fb8bd92a +size 557 diff --git a/checkpoint-1600/scheduler.pt b/checkpoint-1600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..79bc26f8e83a8e2aff42923a87ed8667bb3b4b68 --- /dev/null +++ b/checkpoint-1600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd159ca9420caa44d613b1e1bb9a5dc0a067e433fb7974a1c1b1d32d56d16635 +size 627 diff --git a/checkpoint-1600/trainer_state.json b/checkpoint-1600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..17fcb4dc54c04561a9a8128968f9a681b9358e64 --- /dev/null +++ b/checkpoint-1600/trainer_state.json @@ -0,0 +1,1936 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8847110865358031, + "global_step": 1600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 5.19952498950144e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1600/training_args.bin b/checkpoint-1600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1700/README.md b/checkpoint-1700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1700/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1700/adapter_config.json b/checkpoint-1700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1700/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1700/adapter_model.bin b/checkpoint-1700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..b470ac3af813b5dbb3f12a10c9c0cdf738b44602 --- /dev/null +++ b/checkpoint-1700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23a84a778e38fd117efea5598dc7824198bdc2edc1d99e249516598c187dc87 +size 16821197 diff --git a/checkpoint-1700/finetuning_args.json b/checkpoint-1700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1700/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1700/optimizer.pt b/checkpoint-1700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bbcca9e2828ac14b85db7040543655ffa8bd139 --- /dev/null +++ b/checkpoint-1700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55ced3bc1230b9e9e7b5d9d0ab71f1c7fef3ba4d3a248d8a8eaa6bc1b98108fa +size 33629893 diff --git a/checkpoint-1700/rng_state.pth b/checkpoint-1700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b96ec4cfa1c80387039454f5768a6db59a4ca827 --- /dev/null +++ b/checkpoint-1700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fb0a90ee236ffe22b6ad8bc29c2144be68e2aacb9697c1d0f1629b6b130f73 +size 14575 diff --git a/checkpoint-1700/scaler.pt b/checkpoint-1700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1994ea58742917cc2b26abb388e3a67e77e632e7 --- /dev/null +++ b/checkpoint-1700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570f84cc2ee81db74fdd5c7a9757e8395fb87f7d79b528b92f95c936b75df760 +size 557 diff --git a/checkpoint-1700/scheduler.pt b/checkpoint-1700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a706748290e3e72867c505411dfcab6add7e2788 --- /dev/null +++ b/checkpoint-1700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec19701d8041a93883bce2e2f417075a5bba010e1e2458c8682173e75980869b +size 627 diff --git a/checkpoint-1700/trainer_state.json b/checkpoint-1700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a1b0bfede9c111938b05767ac835745b668412f --- /dev/null +++ b/checkpoint-1700/trainer_state.json @@ -0,0 +1,2056 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9400055294442908, + "global_step": 1700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 5.52449530134528e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1700/training_args.bin b/checkpoint-1700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1800/README.md b/checkpoint-1800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1800/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1800/adapter_config.json b/checkpoint-1800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1800/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1800/adapter_model.bin b/checkpoint-1800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba40789cf8c82feaec86d7399ad8a4a2971944b4 --- /dev/null +++ b/checkpoint-1800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44ec5df9df116b0810100e05fcb36c14895fc55c744b273376b947fd3d66e3b +size 16821197 diff --git a/checkpoint-1800/finetuning_args.json b/checkpoint-1800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1800/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1800/optimizer.pt b/checkpoint-1800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc13b941a34cbef06a89817b4fa5e5ba961328de --- /dev/null +++ b/checkpoint-1800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba34d50489d9b74bc8e2ac4572b2334c16a4da2e7b429fe0cfc97dbeb23df9b8 +size 33629893 diff --git a/checkpoint-1800/rng_state.pth b/checkpoint-1800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0b4992ea823eef8997382f81031a328ed93b30a --- /dev/null +++ b/checkpoint-1800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e1efe9aefc3e3317212550b5c1d49c5fb52f497d2a0ceb8e0873720e6d50bc +size 14575 diff --git a/checkpoint-1800/scaler.pt b/checkpoint-1800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..32ab56657afdaa3a8cb368a35fa39d6dcf41f3e2 --- /dev/null +++ b/checkpoint-1800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e372dbdefa78360aa0c173dac32ed07b6adf2d79936f267acae43d837649895 +size 557 diff --git a/checkpoint-1800/scheduler.pt b/checkpoint-1800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1588ac8c135dc4ac5c302d2ced4b260a181fc65 --- /dev/null +++ b/checkpoint-1800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff3a1c03eb5002392be8506dff36c13f52e7b419b7da1f218a24d8d9d6b49ef1 +size 627 diff --git a/checkpoint-1800/trainer_state.json b/checkpoint-1800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..90d82ce396a4491be51d424cad107bfe1489135c --- /dev/null +++ b/checkpoint-1800/trainer_state.json @@ -0,0 +1,2176 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9952999723527786, + "global_step": 1800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 5.84946561318912e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1800/training_args.bin b/checkpoint-1800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-1900/README.md b/checkpoint-1900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-1900/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-1900/adapter_config.json b/checkpoint-1900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-1900/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1900/adapter_model.bin b/checkpoint-1900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f5c005e58b424fa30593f5ab7278402ea52271f --- /dev/null +++ b/checkpoint-1900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d1e64b0bd633eae5ec34b4138d9788c17d9002d840ee6d5edfc0e3131a7f63d +size 16821197 diff --git a/checkpoint-1900/finetuning_args.json b/checkpoint-1900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1900/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1900/optimizer.pt b/checkpoint-1900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e1d20007abe03e4b289ac491610c562240106db --- /dev/null +++ b/checkpoint-1900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5305799a10f2f29b1ba7604049a8ddebcf5259b0bcedecc378f9325ebae045f8 +size 33629893 diff --git a/checkpoint-1900/rng_state.pth b/checkpoint-1900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..48ba965be902afb4078f50329ba15f2bdc6890e5 --- /dev/null +++ b/checkpoint-1900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d9f6438a062aeda147e024366c371a1e7903b4df091268d0eb7eed31a0fc48 +size 14575 diff --git a/checkpoint-1900/scaler.pt b/checkpoint-1900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab4fda662c52cecd08bd079a51b7f956c487f0d7 --- /dev/null +++ b/checkpoint-1900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e357cd04133d4fbb24404183ed6dc0dbd45f8f23dbb5f3ab48500ab3008d334 +size 557 diff --git a/checkpoint-1900/scheduler.pt b/checkpoint-1900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..96f2628bd14bc6054fd29f63168d50c26d377cd2 --- /dev/null +++ b/checkpoint-1900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:515bf249a39fd81a75dcf7da279e56862bc516dd796c281d8057fbc2d0ea1595 +size 627 diff --git a/checkpoint-1900/trainer_state.json b/checkpoint-1900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..815aa0b900feb6b3a1064516d51b46ae71e3f3ba --- /dev/null +++ b/checkpoint-1900/trainer_state.json @@ -0,0 +1,2296 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0505944152612663, + "global_step": 1900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 6.174232818588058e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1900/training_args.bin b/checkpoint-1900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-1900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.bin b/checkpoint-200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..81daae1474949d54d34e08edfe8f2e257486d0a5 --- /dev/null +++ b/checkpoint-200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2d842c148c34903560212f0fe90cb8f7701bee03727ca7bbb5a34f4d269845 +size 16821197 diff --git a/checkpoint-200/finetuning_args.json b/checkpoint-200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd0ca2e483e16409bb804a6491593fbfa53ef0cf --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1180b74046e5cef9f4ced87381dd202ad5fcf6c02db677e81636c7ecb79a209e +size 33629765 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..48e6e67f9fe7342f901a686941f7836d3c866ce8 --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d02de9936ecaaf49875d4fe56124cae0a6769db4bfcc1adc9d6b2f802dc06f2 +size 14575 diff --git a/checkpoint-200/scaler.pt b/checkpoint-200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..87710d0ddc627c070366fcb3112b07dc60d97295 --- /dev/null +++ b/checkpoint-200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fccf0f9be1bb8f24861e4393745b3e09cc2687125a69e3757955fb0f0925ea5 +size 557 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..515c02d83d26983c06a8feff93329ee7034df8e8 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a95f9614881bc91e6037f672db0bc581684c89ca48002138ca0af76b078c581d +size 627 diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d54b9e16279600b7962678f5cf59d6e74de8711d --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,256 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.11058888581697539, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 6.4994062368768e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2000/README.md b/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2000/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2000/adapter_config.json b/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2000/adapter_model.bin b/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..de73ede0438bdbf98d727d6eb06408f3b92a5b5e --- /dev/null +++ b/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fec93922f5d6f0ddc7b5d9328db89aac822271d26033ed2ab663cfa41a20d93 +size 16821197 diff --git a/checkpoint-2000/finetuning_args.json b/checkpoint-2000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94138e83a0e563734e542698f7b818b9baad600c --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dc411bbab5acda18e7a97a113992989971b36aa5bab7d1572edae5e472dc378 +size 33629893 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d23677ac9999be0465fc13163752d819321810f --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccfb3729f52db9d1d38a1974f1656a0cd37b97ad2c4524db1ca07de1439405ee +size 14575 diff --git a/checkpoint-2000/scaler.pt b/checkpoint-2000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..21ece35416ce79724d347155f11cfa297b97cabc --- /dev/null +++ b/checkpoint-2000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2de9749828adacdf103bf6e9592702bb7067a2c1df27dd62ab38c1eb8c070f +size 557 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..174ddc034d1dd2f71f32a615c70aa0718830f3f2 --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34714cafee13f3adec6a27118b37809e19e77c9588fe35ed6c224901ce798f01 +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e3cc0357dfb565ecc0e2d0ba4b2243a1b7bc9f4e --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,2416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.105888858169754, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 6.499203130431898e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2100/README.md b/checkpoint-2100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2100/adapter_config.json b/checkpoint-2100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2100/adapter_model.bin b/checkpoint-2100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4278fad2f58919d7033797ca158166ec15796db2 --- /dev/null +++ b/checkpoint-2100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d33a37d9cc712714783a7c834c74665693843e99dd50a852a7727586658d87cc +size 16821197 diff --git a/checkpoint-2100/finetuning_args.json b/checkpoint-2100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2100/optimizer.pt b/checkpoint-2100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..358d3a25bbe13edb4d2c24c4e24c8e72a785c740 --- /dev/null +++ b/checkpoint-2100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b721f850eeb074e88148592e3f91c40ab7fdb3bf9ff6153a30b1797f82d86f86 +size 33629893 diff --git a/checkpoint-2100/rng_state.pth b/checkpoint-2100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..35556b3b3d592d487f892662de3fee7e28762b47 --- /dev/null +++ b/checkpoint-2100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47938d7d7e05e552401fa261aa12e7f1b5b272039f7c1f4da8e7f90782fcb5d6 +size 14575 diff --git a/checkpoint-2100/scaler.pt b/checkpoint-2100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b19fddadfca880dd42a9a106f08d64d61255f37e --- /dev/null +++ b/checkpoint-2100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3d96e72039fcffe3e936751e5f9bf95804f00efdb11e53bb1f33cb88fe4634 +size 557 diff --git a/checkpoint-2100/scheduler.pt b/checkpoint-2100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d857ce8d50e3084e5205e1f02137c346d6c63d06 --- /dev/null +++ b/checkpoint-2100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5bddf5dd85e9719e200de1930c760710ee2ce9995459598969ceb948c1d3e15 +size 627 diff --git a/checkpoint-2100/trainer_state.json b/checkpoint-2100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eab828b29bf300e0cdcfe532ff4b0122fe8f2cd3 --- /dev/null +++ b/checkpoint-2100/trainer_state.json @@ -0,0 +1,2536 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1611833010782417, + "global_step": 2100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 6.824173442275738e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2100/training_args.bin b/checkpoint-2100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2200/README.md b/checkpoint-2200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2200/adapter_config.json b/checkpoint-2200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2200/adapter_model.bin b/checkpoint-2200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..eef80dd46843995a2383ba3d29c269e23e628351 --- /dev/null +++ b/checkpoint-2200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2666fbd03841e70fa6b1da71fe4a9f5012fb196696637b56e135129adb596568 +size 16821197 diff --git a/checkpoint-2200/finetuning_args.json b/checkpoint-2200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2200/optimizer.pt b/checkpoint-2200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..097d3c9eee4319ede536b4f29d2427a87b58da9a --- /dev/null +++ b/checkpoint-2200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f504a458e598aa619f61de44bc1f04d329401d6ceb8705a78a0645aa19029071 +size 33629893 diff --git a/checkpoint-2200/rng_state.pth b/checkpoint-2200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..32e02ca81655b2c88f303723ae12348f72b289b1 --- /dev/null +++ b/checkpoint-2200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665e43c3a028bbcd0603a75849ce9bad582f9f47653c55c4aa8e683df9536078 +size 14575 diff --git a/checkpoint-2200/scaler.pt b/checkpoint-2200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3579f609fe4606fd7c25c8c2c351d87605820edc --- /dev/null +++ b/checkpoint-2200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9012ab256b2231d7b45a67199edd8e08e510d79d3be9148a71f7add9e50e9c77 +size 557 diff --git a/checkpoint-2200/scheduler.pt b/checkpoint-2200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aefd5859fe919447bcde596280957df243eecd5f --- /dev/null +++ b/checkpoint-2200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7940ceda380efa700f5abb85afaef4017903c3265f39e25795b4d3008a743a73 +size 627 diff --git a/checkpoint-2200/trainer_state.json b/checkpoint-2200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4af802e7a5ca9b70ac0b84939fbbb241c051faa5 --- /dev/null +++ b/checkpoint-2200/trainer_state.json @@ -0,0 +1,2656 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2164777439867294, + "global_step": 2200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 7.149143754119578e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2200/training_args.bin b/checkpoint-2200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2300/README.md b/checkpoint-2300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2300/adapter_config.json b/checkpoint-2300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2300/adapter_model.bin b/checkpoint-2300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3065e1f1559de76190f2aac7c97f6ff414963508 --- /dev/null +++ b/checkpoint-2300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:526a5b0aaf09783875dbf30dd5b35fcb570dfb78639c7f20a47b3b1c47a7be9b +size 16821197 diff --git a/checkpoint-2300/finetuning_args.json b/checkpoint-2300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2300/optimizer.pt b/checkpoint-2300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..23a812b3b7e7e9b9d3c8a803aa87422d9f115d90 --- /dev/null +++ b/checkpoint-2300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc833a74ac6b3f19f5522643f7b5456762fd386d3a7b9dd78a7a9b21be19ade9 +size 33629893 diff --git a/checkpoint-2300/rng_state.pth b/checkpoint-2300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c4117693ed2831ad36816bfa619b3d68ac3d634 --- /dev/null +++ b/checkpoint-2300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb8a0cda32c82e5f1af5ba90128d875eab80f28a9a2be59bd8629779d020e15 +size 14575 diff --git a/checkpoint-2300/scaler.pt b/checkpoint-2300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1af110a3c24e2664cf1d694d79f220d2c5f1aba6 --- /dev/null +++ b/checkpoint-2300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f1f4327a1208bf521efff1b8d6cfb810622b15a0ec09d2d4ac68bc0d0ff3ae +size 557 diff --git a/checkpoint-2300/scheduler.pt b/checkpoint-2300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b486c1e93036d295a8d5c6a46aaaf3eb2efef89 --- /dev/null +++ b/checkpoint-2300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38ae932a17737114cff5619c9924b526d24c405625daeb000b402ac4b748a9c +size 627 diff --git a/checkpoint-2300/trainer_state.json b/checkpoint-2300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..77abe399440d238ff89184a424c159abe7c97555 --- /dev/null +++ b/checkpoint-2300/trainer_state.json @@ -0,0 +1,2776 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.271772186895217, + "global_step": 2300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 7.474114065963418e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2300/training_args.bin b/checkpoint-2300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2400/README.md b/checkpoint-2400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2400/adapter_config.json b/checkpoint-2400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2400/adapter_model.bin b/checkpoint-2400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a342cccef9dabab79ceb7772e9c4cffe7a1590bb --- /dev/null +++ b/checkpoint-2400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388875a628eb6e6476ad9f53c3be64d1a15c7e37e0a1634f963c3b8f341592e9 +size 16821197 diff --git a/checkpoint-2400/finetuning_args.json b/checkpoint-2400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2400/optimizer.pt b/checkpoint-2400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..418034e1f5f5ce5bc2bd7c8c2fb49f2c907f6092 --- /dev/null +++ b/checkpoint-2400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462d2f76cf4d5a32de4e792146972493aec5db696831e612f3182bd6139a97e8 +size 33629893 diff --git a/checkpoint-2400/rng_state.pth b/checkpoint-2400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..52b95b3f8db2caff009138e0590ef5b8fccdd8cd --- /dev/null +++ b/checkpoint-2400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a6c07ecf4844d8b23487b2f3e0c59fb06689b37bed94e7bc34b387bd0c2e6f +size 14575 diff --git a/checkpoint-2400/scaler.pt b/checkpoint-2400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..05c991fe0d31f36923f65fa09bb9d7c1bde541bd --- /dev/null +++ b/checkpoint-2400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3394d96409ceafdf6f72a31a1eab4e95f434c26b3e6eb0029414a0b03634c63 +size 557 diff --git a/checkpoint-2400/scheduler.pt b/checkpoint-2400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f207920313304c1745298423730fe834bb62ced --- /dev/null +++ b/checkpoint-2400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b0da3b14ee975aa1b58eb33dee90aafeb02380723700235cb34656afcb0253 +size 627 diff --git a/checkpoint-2400/trainer_state.json b/checkpoint-2400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7689d7ca1da23cbe3414b03d1648d983760b545e --- /dev/null +++ b/checkpoint-2400/trainer_state.json @@ -0,0 +1,2896 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3270666298037046, + "global_step": 2400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 7.799084377807258e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2400/training_args.bin b/checkpoint-2400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2500/README.md b/checkpoint-2500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2500/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2500/adapter_config.json b/checkpoint-2500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2500/adapter_model.bin b/checkpoint-2500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbbf98c9ec6b7c857d12650792465dd6b7e20708 --- /dev/null +++ b/checkpoint-2500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b46a85c82fc36136516ed06f0948d46104285bdb1df71791e1ce2c2acd50415 +size 16821197 diff --git a/checkpoint-2500/finetuning_args.json b/checkpoint-2500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2500/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6239f5af2a423c9475333d94d45906323f22ea9b --- /dev/null +++ b/checkpoint-2500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e32dc47d69ed22b3e5062ab72329dae51414424f8c4272511ee20defe66be86 +size 33629893 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e904a7f6d702b422eef36280576f48b522e4ca1e --- /dev/null +++ b/checkpoint-2500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a3dda0f3ed1815b927cd2c99c70593f54eaa16620ca0ee9fb1fac7e05ec7e6c +size 14575 diff --git a/checkpoint-2500/scaler.pt b/checkpoint-2500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a3e6e139e26b18feddbe2f66ba98b73890dacfa --- /dev/null +++ b/checkpoint-2500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fbcebc8f5487b0c117b5dd47f2ea304af3eebf408d297118d9307e1223927e1 +size 557 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e39fd389240f8e57ff4e1247bbab3dc58b4ad5bd --- /dev/null +++ b/checkpoint-2500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2724550786267747698f93728904594e4f541b8688189f0536d8de28de47e81b +size 627 diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c5e285f8c4f2a0f3ebaf369756e12c07bf6fd142 --- /dev/null +++ b/checkpoint-2500/trainer_state.json @@ -0,0 +1,3016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3823610727121924, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 8.124054689651098e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2600/README.md b/checkpoint-2600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2600/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2600/adapter_config.json b/checkpoint-2600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2600/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2600/adapter_model.bin b/checkpoint-2600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb534b102bfbe3cbded536492a2b9d1e1e96f51a --- /dev/null +++ b/checkpoint-2600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc30411b3468530b82ee6046dfeaa284fcf84d89769de04b5f961ae50f437a5f +size 16821197 diff --git a/checkpoint-2600/finetuning_args.json b/checkpoint-2600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2600/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2600/optimizer.pt b/checkpoint-2600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2c1552d2e73c36b28526b82630c3cbec97e1fc8 --- /dev/null +++ b/checkpoint-2600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450493077b415dbfa93d21f30bdd168b7e1df6b676a28f1a53bd7aa5d1ee0ac9 +size 33629893 diff --git a/checkpoint-2600/rng_state.pth b/checkpoint-2600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..10f5345cdc19da44341de172f2a09d00eeb2c4dd --- /dev/null +++ b/checkpoint-2600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8221156c4eef718bbc61636e48e9c6d92cfef055dafd9d63f499636cb351498 +size 14575 diff --git a/checkpoint-2600/scaler.pt b/checkpoint-2600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..65894f4d214482c93818b7f185d1810082ab9e0b --- /dev/null +++ b/checkpoint-2600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5dc513ac70929e4303afe4c21d0bcbe3b91ca4fff6f6fae86bd776ec9758c08 +size 557 diff --git a/checkpoint-2600/scheduler.pt b/checkpoint-2600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a79a2e3d268e09fcb3da56593342c14b9f80da51 --- /dev/null +++ b/checkpoint-2600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0493835445fef515796d2eb7f14f0f009e4ab046eea20f02d51061ebdae44e3d +size 627 diff --git a/checkpoint-2600/trainer_state.json b/checkpoint-2600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..de56b2cdb7251d3deff6058a6c7325d0cdaa115b --- /dev/null +++ b/checkpoint-2600/trainer_state.json @@ -0,0 +1,3136 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.43765551562068, + "global_step": 2600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 8.449025001494938e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2600/training_args.bin b/checkpoint-2600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2700/README.md b/checkpoint-2700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2700/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2700/adapter_config.json b/checkpoint-2700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2700/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2700/adapter_model.bin b/checkpoint-2700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..d90e9c2be8fde3ae3221b538215d5d0ecdd35bbd --- /dev/null +++ b/checkpoint-2700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a831c5cb16f0e2854358c22f358842c433030954afc5f5aecdde09bbc38f2598 +size 16821197 diff --git a/checkpoint-2700/finetuning_args.json b/checkpoint-2700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2700/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2700/optimizer.pt b/checkpoint-2700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..353984f70714b65e98c4ab5b43b8e95859747c1e --- /dev/null +++ b/checkpoint-2700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:632bd28d9d114ff6bb225109ce01e9cde371014ab925252794ddb6ce4c2c8150 +size 33629893 diff --git a/checkpoint-2700/rng_state.pth b/checkpoint-2700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..926f8eeb78f05c240a2ac7845a0d808c795e2a5b --- /dev/null +++ b/checkpoint-2700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291c83a81b90bfe8e6270548e1847b511281d8bce3ebc40e9cc6bb5cfe299a14 +size 14575 diff --git a/checkpoint-2700/scaler.pt b/checkpoint-2700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b421e27b9ecd1bb64083ffb7d48481ab790f500d --- /dev/null +++ b/checkpoint-2700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611c318c30be80da7006a9277428a0d0b234fe54b9ebf6dd069d12b30b325e9b +size 557 diff --git a/checkpoint-2700/scheduler.pt b/checkpoint-2700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..af9e2d9e1c5fa955284f7fd5390779d7fd6503cb --- /dev/null +++ b/checkpoint-2700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4348e0782bb14ba92684d243d8082365124b53e92c8ed3ca6c0851ef0b67b33 +size 627 diff --git a/checkpoint-2700/trainer_state.json b/checkpoint-2700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d944ea0fd7ef63df950f566c9e16fc2d072c2770 --- /dev/null +++ b/checkpoint-2700/trainer_state.json @@ -0,0 +1,3256 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.492949958529168, + "global_step": 2700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 8.773995313338778e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2700/training_args.bin b/checkpoint-2700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2800/README.md b/checkpoint-2800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2800/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2800/adapter_config.json b/checkpoint-2800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2800/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2800/adapter_model.bin b/checkpoint-2800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a06885237cdcbfc04a8b8a6ab2f2591f6572fcd --- /dev/null +++ b/checkpoint-2800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72e133973a3e71290d1f9dfd93f567e4ef71b7723b62abd735249736b408bb3 +size 16821197 diff --git a/checkpoint-2800/finetuning_args.json b/checkpoint-2800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2800/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2800/optimizer.pt b/checkpoint-2800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c290587a29280fad99c6980673fd6269f766b40e --- /dev/null +++ b/checkpoint-2800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e96033a32885d3e7b0670660746df5f56271e786065b033139f6a92df649728 +size 33629893 diff --git a/checkpoint-2800/rng_state.pth b/checkpoint-2800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e19112548972ac83ba86b62201ebff2041d58f96 --- /dev/null +++ b/checkpoint-2800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1b51f5747dbe1e6f9e118f95427c0ce5edb0f93e6eb5d2b84db90e3a32ef034 +size 14575 diff --git a/checkpoint-2800/scaler.pt b/checkpoint-2800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a5dc5ba7d38bb3cd99133530311d77ce670bf05 --- /dev/null +++ b/checkpoint-2800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b60a0810fa8dcebfda25e97911bd0bcfce1f18e5b33d45888d2690d365527ab +size 557 diff --git a/checkpoint-2800/scheduler.pt b/checkpoint-2800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4c97d0c18bdbdf973901be92d8c60c26cfe343d --- /dev/null +++ b/checkpoint-2800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5644edc0ef59a0726c37a2e7c7fa2e2237e875ba32f72069a45642b40d7c54a4 +size 627 diff --git a/checkpoint-2800/trainer_state.json b/checkpoint-2800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c5cd549d188e07b46b0529e35223ea60d9711592 --- /dev/null +++ b/checkpoint-2800/trainer_state.json @@ -0,0 +1,3376 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5482444014376555, + "global_step": 2800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 9.098965625182618e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2800/training_args.bin b/checkpoint-2800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-2900/README.md b/checkpoint-2900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-2900/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-2900/adapter_config.json b/checkpoint-2900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-2900/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2900/adapter_model.bin b/checkpoint-2900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..46b9eabf82c618b15f96bdb091081b1849ebd6da --- /dev/null +++ b/checkpoint-2900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c102b6ff303cf943381a59ae3102717a10271fd3eb5fb86b086897ceb4772b +size 16821197 diff --git a/checkpoint-2900/finetuning_args.json b/checkpoint-2900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2900/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2900/optimizer.pt b/checkpoint-2900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba3f52fb3fc2a4558aa0baf3974ab4b6109af349 --- /dev/null +++ b/checkpoint-2900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:283ab1591314bb3a47193f41801dbccee793587daed56bf23c284aa9eab6342f +size 33629893 diff --git a/checkpoint-2900/rng_state.pth b/checkpoint-2900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f37ac0d75968074e06f898db06e7d385f915f94 --- /dev/null +++ b/checkpoint-2900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7f1eee23c924599285f9b6684dab054f15d43f4e517de1ac9f566856c45461 +size 14575 diff --git a/checkpoint-2900/scaler.pt b/checkpoint-2900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..af2065ffbf07b5fe97840b0b4f5819dbc7699412 --- /dev/null +++ b/checkpoint-2900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c08743502d84107212df2909223567e1eca034178ba5fc17a9aa048a739456 +size 557 diff --git a/checkpoint-2900/scheduler.pt b/checkpoint-2900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f199f69e655203dfd33399983fb1aeff3ef9d448 --- /dev/null +++ b/checkpoint-2900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:344e9ac9bb6ece8f62a266d8d73290dd94c5dea4f73434338f81f82f7562c3a5 +size 627 diff --git a/checkpoint-2900/trainer_state.json b/checkpoint-2900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ac0d13cad1433d8433b82b21dd18037f64fbafc --- /dev/null +++ b/checkpoint-2900/trainer_state.json @@ -0,0 +1,3496 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6035388443461431, + "global_step": 2900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 9.423935937026458e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2900/training_args.bin b/checkpoint-2900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-2900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.bin b/checkpoint-300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ffc7e2dd14d6fabf884b6a1389965b7ec256003b --- /dev/null +++ b/checkpoint-300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7cf736344a1a1f5cee694c18814b64da5a5ea9f505522486df689c644772e3 +size 16821197 diff --git a/checkpoint-300/finetuning_args.json b/checkpoint-300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5405108f2f299b6236a10867d4422117663a2026 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a39a09f19e3211227a8ef3cac2843011149a45a5048103ae3bbb5e56dd1c3a5 +size 33629893 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e848424d7795f701640168e5acee757314e5ee5 --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4017a40cb9b1bc7ec46cb70f38590e340cd66c41d9e554e6fa7a5f79f3e2ef5e +size 14575 diff --git a/checkpoint-300/scaler.pt b/checkpoint-300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..96d1b8ea2cbad321ffe9c10840a99bb1bcef18f5 --- /dev/null +++ b/checkpoint-300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efafd90182e3d39d1b7c4a686f86e5913f5abc094dc3e2f827a6d479c6cef247 +size 557 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..602f76cb5280c073fbd1a1124e6e7a907d851a75 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac03a879fe529e00fdb3ed4d4fa355e56834dce0e65a5a58a5da0c54d8af1a4a +size 627 diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eaf62055a7afbb3421e884437b2c8211b5cf3bc9 --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,376 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1658833287254631, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 9.7491093553152e+16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3000/README.md b/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3000/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3000/adapter_config.json b/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3000/adapter_model.bin b/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..41906c5529dd8f9cd4076cfb449a5b21a060a9c3 --- /dev/null +++ b/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500f5634742c5f745a444367f063540caf10a138110638ec092ebb5851963fe7 +size 16821197 diff --git a/checkpoint-3000/finetuning_args.json b/checkpoint-3000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bd542f6402b458360cf737725b4eced8f274bbd --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3ab86b847c4500eeb7ee769c5bd1ee17a084509341f7bff55a5423aa996dd3 +size 33629893 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d153405baf01590abdfebbfecbe0038462e4dd90 --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e232c924fa80a1edecd65c897044e6660b644416702faad8f9a7cd90e7d62b +size 14575 diff --git a/checkpoint-3000/scaler.pt b/checkpoint-3000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa688268cee4a4f883c7873cc5d8230fa7c23c2e --- /dev/null +++ b/checkpoint-3000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e92604eb0ffa7d229c1fe8234f01a6685212faba2eff1f8394731b53c5389fc +size 557 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..36d2a4af110db90db7769bbb0d9f91619eae4131 --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb12b5edd57745a2158c780555a7103cc9fed9eb3d9628890a6cbeb7cce16bf +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6276b99ce1ee6fc26a42ce135d8ee8a8b739e2f2 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,3616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6588332872546308, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 9.748906248870298e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3100/README.md b/checkpoint-3100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3100/adapter_config.json b/checkpoint-3100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3100/adapter_model.bin b/checkpoint-3100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc2c495af4e4c9a7f80586cef1c6020537c6c256 --- /dev/null +++ b/checkpoint-3100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff740900412b00d821166b4f5a1ea9442baddbd440e7a64206218d2682310953 +size 16821197 diff --git a/checkpoint-3100/finetuning_args.json b/checkpoint-3100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3100/optimizer.pt b/checkpoint-3100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..19e9d2cdfdfe7248d0a3e21627277740bab231aa --- /dev/null +++ b/checkpoint-3100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5074825f32313125ddd5d48fb1dc5437fc6c94b46d8f26a2176b127e67b0ffa3 +size 33629893 diff --git a/checkpoint-3100/rng_state.pth b/checkpoint-3100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..66ecc534c4edbad283360b9c47c456c6b5f486cd --- /dev/null +++ b/checkpoint-3100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f69ae2d4e0e7e7c5c0f11d912a55cf35966870914bc97c786b4d90eb5e7bed7 +size 14575 diff --git a/checkpoint-3100/scaler.pt b/checkpoint-3100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b64b2098fb34983cc65001f535fa01d8014b243a --- /dev/null +++ b/checkpoint-3100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89281887f63e4bab3e4a2d66122be62dc9a6fca78e0926ff2e3f3cab8d2b4d0e +size 557 diff --git a/checkpoint-3100/scheduler.pt b/checkpoint-3100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b6e15927ad0e856bfc214f676e7216bed306e42 --- /dev/null +++ b/checkpoint-3100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:811f870de3cef23333be7582cbb9c7044ce76633fd4314d55ee5c0395ed9c896 +size 627 diff --git a/checkpoint-3100/trainer_state.json b/checkpoint-3100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d81b572095de5e9b9ddfcc365a7ede03c4ce72d1 --- /dev/null +++ b/checkpoint-3100/trainer_state.json @@ -0,0 +1,3736 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7141277301631186, + "global_step": 3100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.0073876560714138e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3100/training_args.bin b/checkpoint-3100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3200/README.md b/checkpoint-3200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3200/adapter_config.json b/checkpoint-3200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3200/adapter_model.bin b/checkpoint-3200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a7458e170ef0dc14759aa23901c90b3862ed2d5 --- /dev/null +++ b/checkpoint-3200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f105665bd48354d3ea517de552f4102d5262ccc982989b94af05a359982504 +size 16821197 diff --git a/checkpoint-3200/finetuning_args.json b/checkpoint-3200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3200/optimizer.pt b/checkpoint-3200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..19ca81fcd7309f89607ef3488e13639e397d53f4 --- /dev/null +++ b/checkpoint-3200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edb11534052c3ca27937adb108b97b5ff7bdee29259d26e50ee06246be4806f0 +size 33629893 diff --git a/checkpoint-3200/rng_state.pth b/checkpoint-3200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb2301e152d97b33347186384f04442b74aa50bc --- /dev/null +++ b/checkpoint-3200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abd52241dceff3773364844bdbd9d1a77a5721302bc53129c8ab7c39cf361537 +size 14575 diff --git a/checkpoint-3200/scaler.pt b/checkpoint-3200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fb7216ab854c51e4b47a72420b42c710ef1385c --- /dev/null +++ b/checkpoint-3200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610fe4e989b054a8e84d27ef2d213faf51616598c89cc764011e88603864dada +size 557 diff --git a/checkpoint-3200/scheduler.pt b/checkpoint-3200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1991c0cae2ad9c28f11fdce5920a7a08ce89c5c7 --- /dev/null +++ b/checkpoint-3200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637ac1faea4404324319cb08a290cb08830db156a7680ba6196cdd2321fc3835 +size 627 diff --git a/checkpoint-3200/trainer_state.json b/checkpoint-3200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1bc13acb48914c4fb53031ae95923b6554f4598e --- /dev/null +++ b/checkpoint-3200/trainer_state.json @@ -0,0 +1,3856 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7694221730716064, + "global_step": 3200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.0398846872557978e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3200/training_args.bin b/checkpoint-3200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3300/README.md b/checkpoint-3300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3300/adapter_config.json b/checkpoint-3300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3300/adapter_model.bin b/checkpoint-3300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e700cdc5c7ea5ec6fee7e018095d32180894ddbc --- /dev/null +++ b/checkpoint-3300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40c171e9c86d26caf15dca94029a9e0bb83464a27583837b0502f98db8a3541 +size 16821197 diff --git a/checkpoint-3300/finetuning_args.json b/checkpoint-3300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3300/optimizer.pt b/checkpoint-3300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9b6ce0184be84e65b478026aceb5f22bc739dc7 --- /dev/null +++ b/checkpoint-3300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b927cd485019707d0de2f668fb09311f1fe02c318d28fa0654cf97d49693fc45 +size 33629893 diff --git a/checkpoint-3300/rng_state.pth b/checkpoint-3300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..33695156609ed83edd5809a2e0dbbcadf3eb8500 --- /dev/null +++ b/checkpoint-3300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d99cbd204eef29568dcf5b2570a5fa1c42a9dc0a95bf6465d9ec6adc15d0f4 +size 14575 diff --git a/checkpoint-3300/scaler.pt b/checkpoint-3300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2634305285908bc3feb9f66567eacd637c03d8f2 --- /dev/null +++ b/checkpoint-3300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0636f8450691b406fdc8b31c4b42e80cdcce41b8a64e7e97731bb2dae1fff46 +size 557 diff --git a/checkpoint-3300/scheduler.pt b/checkpoint-3300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..010c8bd3b7b1e02117de4bf022774db7ba997298 --- /dev/null +++ b/checkpoint-3300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20169ba669122062d555e95dd03f67c830cc01a283910c1095fd66840aeb65ab +size 627 diff --git a/checkpoint-3300/trainer_state.json b/checkpoint-3300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ff80422868ce04302288eae08306dde33c3e5cb5 --- /dev/null +++ b/checkpoint-3300/trainer_state.json @@ -0,0 +1,3976 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.824716615980094, + "global_step": 3300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.0723817184401818e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3300/training_args.bin b/checkpoint-3300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3400/README.md b/checkpoint-3400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3400/adapter_config.json b/checkpoint-3400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3400/adapter_model.bin b/checkpoint-3400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2022e62e28a6cec5b57d1503a3ce072d8d565949 --- /dev/null +++ b/checkpoint-3400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248bc066fc63c703356449bed541740ed23c8534df9aef885f5eddb66745a6ed +size 16821197 diff --git a/checkpoint-3400/finetuning_args.json b/checkpoint-3400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3400/optimizer.pt b/checkpoint-3400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..201ac02a6a73503d4025fbfe93b85fea56e86db2 --- /dev/null +++ b/checkpoint-3400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0504b2a2ffbf0a0ae03b957e51eb9b3ba871fefc739a885769c59ade4203a58 +size 33629893 diff --git a/checkpoint-3400/rng_state.pth b/checkpoint-3400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..71271dbedacaf341d2460a45af665959aa5075cf --- /dev/null +++ b/checkpoint-3400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b76b884dc5c76f6da6aaf50efb1cc033cfbd961c45163c4666a715aa78b806 +size 14575 diff --git a/checkpoint-3400/scaler.pt b/checkpoint-3400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e29bd9599a15fbd86e48c52dbf31693e3b988bc --- /dev/null +++ b/checkpoint-3400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41bbd89d480659208f2c65b95d079219da2d55ffe29a6a534f56f724e89d1a2 +size 557 diff --git a/checkpoint-3400/scheduler.pt b/checkpoint-3400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f884e22ca549c081973a057be18a68d70768bead --- /dev/null +++ b/checkpoint-3400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:092acc458117d2766344122c86b62db4d2ea19e96556517459f0620211e5f8ed +size 627 diff --git a/checkpoint-3400/trainer_state.json b/checkpoint-3400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..94e819a9ce969253590f6800bfb944a4265c7cb5 --- /dev/null +++ b/checkpoint-3400/trainer_state.json @@ -0,0 +1,4096 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8800110588885817, + "global_step": 3400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.1048787496245658e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3400/training_args.bin b/checkpoint-3400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3500/README.md b/checkpoint-3500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3500/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3500/adapter_config.json b/checkpoint-3500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3500/adapter_model.bin b/checkpoint-3500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7442285a6295f246b8f31134194e3efe92a5b7e6 --- /dev/null +++ b/checkpoint-3500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0aaa8d1d9c24e30c094474d2eea9ec21922b3469d3dde255913a1de0c82564 +size 16821197 diff --git a/checkpoint-3500/finetuning_args.json b/checkpoint-3500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3500/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3500/optimizer.pt b/checkpoint-3500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e035d8261d23a5464a74990a13d33b2ee0be404 --- /dev/null +++ b/checkpoint-3500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57cc277962cf48768f61c32eff030fb1a1967dd9a62bb4f58fc9a8433ffd5c2d +size 33629893 diff --git a/checkpoint-3500/rng_state.pth b/checkpoint-3500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73ed23d3534a5586804af70483d97e8a52bb2164 --- /dev/null +++ b/checkpoint-3500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae5a3200408814c3a46da73d425342952c13941329b8819f5caf1848e29e97c +size 14575 diff --git a/checkpoint-3500/scaler.pt b/checkpoint-3500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1d2155d50ddf4f4923d06bad1821929a40c49b9 --- /dev/null +++ b/checkpoint-3500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4acfabbdd3bb0ebe8c930c3e8f226f5d274efb84e86fe9b7e9387a8329c6bc9 +size 557 diff --git a/checkpoint-3500/scheduler.pt b/checkpoint-3500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..895e5f4b1b39a0aa9232ebb70e3a8719126ad4fd --- /dev/null +++ b/checkpoint-3500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60c0f076bba8f640cc6e1ac9de53260823a93627f5f05e653520c66068d084d8 +size 627 diff --git a/checkpoint-3500/trainer_state.json b/checkpoint-3500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07a2736c88085f8ba4649ebfb7d843cbcc5492e2 --- /dev/null +++ b/checkpoint-3500/trainer_state.json @@ -0,0 +1,4216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9353055017970693, + "global_step": 3500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.1373757808089498e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3500/training_args.bin b/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3600/README.md b/checkpoint-3600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3600/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3600/adapter_config.json b/checkpoint-3600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3600/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3600/adapter_model.bin b/checkpoint-3600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..51ecef1525af65f0a3b8087a281d6243fd8a7df2 --- /dev/null +++ b/checkpoint-3600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4849fe35768fbb8282777348832a522ff51651a65ff91b425f8f7d3b20a88c51 +size 16821197 diff --git a/checkpoint-3600/finetuning_args.json b/checkpoint-3600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3600/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3600/optimizer.pt b/checkpoint-3600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..439c251666809127b2646a3cd800318cafc1b0d7 --- /dev/null +++ b/checkpoint-3600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87733bf6ef55073bac3bc4d83ddac317129da295315a398f50a81fd86ca54fad +size 33629893 diff --git a/checkpoint-3600/rng_state.pth b/checkpoint-3600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a89deb0bb06fa40c8a18901aa03fb0b493f09ac5 --- /dev/null +++ b/checkpoint-3600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09c20b785e019b4c54040ecd0138b03c9f518903f5c695f7560f49f3f509a80 +size 14575 diff --git a/checkpoint-3600/scaler.pt b/checkpoint-3600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8843e176bcc7f391e15e61642adea33b9c3d3f1e --- /dev/null +++ b/checkpoint-3600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:533e26a83861d3eb73b5bece6aaba2dc023f9924c236c248fa6a3f38d29733ef +size 557 diff --git a/checkpoint-3600/scheduler.pt b/checkpoint-3600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..42e30189fec0a72553bf1f77e47bfae296775aa7 --- /dev/null +++ b/checkpoint-3600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a89b5aec9aaa012e4cc29b58380e457e3878ec4a8104692d6ea8349a8752162 +size 627 diff --git a/checkpoint-3600/trainer_state.json b/checkpoint-3600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..00532eab27df23ede0e20599ceac6196514c045e --- /dev/null +++ b/checkpoint-3600/trainer_state.json @@ -0,0 +1,4336 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9905999447055571, + "global_step": 3600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.1698728119933338e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3600/training_args.bin b/checkpoint-3600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3700/README.md b/checkpoint-3700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3700/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3700/adapter_config.json b/checkpoint-3700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3700/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3700/adapter_model.bin b/checkpoint-3700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..325c6327205b1d76898e53e7f8dd9c1e4551b788 --- /dev/null +++ b/checkpoint-3700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1fba7ab96c118b426b49d620ddfb3af195d771f7eb01aa493aea313e699ed2a +size 16821197 diff --git a/checkpoint-3700/finetuning_args.json b/checkpoint-3700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3700/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3700/optimizer.pt b/checkpoint-3700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f5ecda450e6a9b528a1293dc40669cd273f134d --- /dev/null +++ b/checkpoint-3700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a7150fbeafce86bab7cd6d8b46c8acb66c89ef59b1b12dd7e753598831647e +size 33629893 diff --git a/checkpoint-3700/rng_state.pth b/checkpoint-3700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8c0313a161b410af00ddcc63633501f838ed5e4 --- /dev/null +++ b/checkpoint-3700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c27daf062b1f1322d0caa45284aa3d9e18d941fdaea8f76dd6ca508ef9c101e +size 14575 diff --git a/checkpoint-3700/scaler.pt b/checkpoint-3700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b238a289262df1da7a2c0b3c05312b38ede3591 --- /dev/null +++ b/checkpoint-3700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b6f8e729a612797ebc65b220b1baf518efd325d1059b642662e01842d8d672 +size 557 diff --git a/checkpoint-3700/scheduler.pt b/checkpoint-3700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c87308f0555cb3a8a78ff6fcdbb3f8f3cc28ecfb --- /dev/null +++ b/checkpoint-3700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af7dfe785f930b634de12e144318d2031058f837e5c86b7358bb932f2763f63 +size 627 diff --git a/checkpoint-3700/trainer_state.json b/checkpoint-3700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c6f75837503a19c2f3b59a4b4b7472285a897605 --- /dev/null +++ b/checkpoint-3700/trainer_state.json @@ -0,0 +1,4456 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.045894387614045, + "global_step": 3700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.2023495325332275e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3700/training_args.bin b/checkpoint-3700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3800/README.md b/checkpoint-3800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3800/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3800/adapter_config.json b/checkpoint-3800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3800/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3800/adapter_model.bin b/checkpoint-3800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae56fdd70fd8eccca449099f5ef243f3692520df --- /dev/null +++ b/checkpoint-3800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7916085b920409417879716d8b11eb09ca380f336ad8ae54429c7701c22e61 +size 16821197 diff --git a/checkpoint-3800/finetuning_args.json b/checkpoint-3800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3800/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3800/optimizer.pt b/checkpoint-3800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f3b0cf9688d1bd949cc9bb30499149d15104e32 --- /dev/null +++ b/checkpoint-3800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6a79f591a3b82409149b828711dc4b708e678e635641f12a8f65cbb78e24db +size 33629893 diff --git a/checkpoint-3800/rng_state.pth b/checkpoint-3800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..25259e4c54217361aa942700e5a859af5e4e5597 --- /dev/null +++ b/checkpoint-3800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46e288ed1afffabcdc2493965fe99b39867dbdfd5c62df7c4bf83464098c6fd2 +size 14575 diff --git a/checkpoint-3800/scaler.pt b/checkpoint-3800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0b4111abbacd191d4d2184158a5dd62f49497b1 --- /dev/null +++ b/checkpoint-3800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af3c6997748f6e7a2d25d25e222d97de21736c74a85cd39cfd89ac085d227ede +size 557 diff --git a/checkpoint-3800/scheduler.pt b/checkpoint-3800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d110ac3ad62e02a5e996e65ef1e742cc6bed6406 --- /dev/null +++ b/checkpoint-3800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f693de062b37faae6110783bcb36f18fd09b5cceee99a0849d9ca51a008b416 +size 627 diff --git a/checkpoint-3800/trainer_state.json b/checkpoint-3800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f99fdf12bde6d19590f8206662af3a0d50781cef --- /dev/null +++ b/checkpoint-3800/trainer_state.json @@ -0,0 +1,4576 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.1011888305225326, + "global_step": 3800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.2348465637176115e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3800/training_args.bin b/checkpoint-3800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-3900/README.md b/checkpoint-3900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-3900/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-3900/adapter_config.json b/checkpoint-3900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-3900/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3900/adapter_model.bin b/checkpoint-3900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5ec137f9e581d4be2512d975b260607e2bab4b32 --- /dev/null +++ b/checkpoint-3900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce9ab9e2de938572e1f36505c37f85a01eeaee9548b6971ef1ac6fd971ffece +size 16821197 diff --git a/checkpoint-3900/finetuning_args.json b/checkpoint-3900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3900/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3900/optimizer.pt b/checkpoint-3900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..574d88bca50dc340ad00a25f81a81d5872852a62 --- /dev/null +++ b/checkpoint-3900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644e27b60dfdde5dd0ae642279ae96e62f8810ca6f7169a1e869fb4bca1774d3 +size 33629893 diff --git a/checkpoint-3900/rng_state.pth b/checkpoint-3900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e13f61ead0e43b323e6b2725006ba432c6d1b6b --- /dev/null +++ b/checkpoint-3900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c216f51399c0b56ed916b658feba509a4b62bcef7ec119088840d872a30ab67 +size 14575 diff --git a/checkpoint-3900/scaler.pt b/checkpoint-3900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cd96d2a30d5a9c4b0647e3e9c61318caf9fa425 --- /dev/null +++ b/checkpoint-3900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24408fc97d3e3823085540d3e11e10782052b43913d464a65d8b13e09684ed6c +size 557 diff --git a/checkpoint-3900/scheduler.pt b/checkpoint-3900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f2426ad1634227b69533a2cdc0242a115aad09c --- /dev/null +++ b/checkpoint-3900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992722d474520eca9ca33ef53f66dfa8971d0bc1c2679da463e30ba146e8d982 +size 627 diff --git a/checkpoint-3900/trainer_state.json b/checkpoint-3900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6c56f6b1ebdeefa6c18cab8c18cee608c38e48af --- /dev/null +++ b/checkpoint-3900/trainer_state.json @@ -0,0 +1,4696 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.15648327343102, + "global_step": 3900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.2673435949019955e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3900/training_args.bin b/checkpoint-3900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-3900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-400/README.md b/checkpoint-400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-400/adapter_config.json b/checkpoint-400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-400/adapter_model.bin b/checkpoint-400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6d60737fc97737060e70b6d9a80ef90cf14451b --- /dev/null +++ b/checkpoint-400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cbf647a1fa4d118ed1218edb640917d74faf1bfc4336e112d54e30bd44cc5e +size 16821197 diff --git a/checkpoint-400/finetuning_args.json b/checkpoint-400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-400/optimizer.pt b/checkpoint-400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a6439035e4f3f8b0ba176b194439a391b2f0e0d --- /dev/null +++ b/checkpoint-400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a5a58a19136dff788a80f4700c93f26ba361de0aa8b7cecf9cbfc21421f6d3 +size 33629893 diff --git a/checkpoint-400/rng_state.pth b/checkpoint-400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1d319dc51b4a33c7b1cf78583c8a260c6f5ae50 --- /dev/null +++ b/checkpoint-400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:469e2fd5132805627960cd56cac8ddf213da7047b608d71846c80084f1a04d96 +size 14575 diff --git a/checkpoint-400/scaler.pt b/checkpoint-400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ee84e1b463b64410ae6f3d5c680aef0d7e2b14f --- /dev/null +++ b/checkpoint-400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc40a4be6a52cee4d7658df4041e660ffa02b0d8b5bd143bb8bb397f7b71b1a5 +size 557 diff --git a/checkpoint-400/scheduler.pt b/checkpoint-400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8301f6b6a33bb81a381c08dbb30c104db85c8ab --- /dev/null +++ b/checkpoint-400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee80c1a25aca41d12531c6f3d7abac218cc1bf790a3dbf9a63582d19fba877c2 +size 627 diff --git a/checkpoint-400/trainer_state.json b/checkpoint-400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b70e04fab4ca6e8be02a48fab08065bc28e654ef --- /dev/null +++ b/checkpoint-400/trainer_state.json @@ -0,0 +1,496 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.22117777163395078, + "global_step": 400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.29988124737536e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-400/training_args.bin b/checkpoint-400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4000/README.md b/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4000/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4000/adapter_config.json b/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4000/adapter_model.bin b/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..76dadb045a2c696bf35a38fbefe978a3cc0d2acb --- /dev/null +++ b/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98dd7449fd6804268b72aa3c1dfb3e5b2a85a32ecae37c53ff97ec0331b65f6 +size 16821197 diff --git a/checkpoint-4000/finetuning_args.json b/checkpoint-4000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b115870a48d5116341a366bb80daf470134e4e4 --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4000371fc770162dd837c6a7428fa4be38c20d48071804daf6b97899adecb35 +size 33629893 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6aece908c490b45e2e0c2f48bfa38082d299131 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8deca2c616f5daf554035843b758e2faab6fc035379ccc3f462d6acb0de96b4b +size 14575 diff --git a/checkpoint-4000/scaler.pt b/checkpoint-4000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..eeb0223b712964a9e5fbe2cbc3e12a4edfe15155 --- /dev/null +++ b/checkpoint-4000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0323cd1185cbf029dcec523b50bf80027dde44172532bf1721e73d374defdd +size 557 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cf58fac776852a37c132bb489999fe0b7f6504a --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90fda0d788e2a9c10797788d995c13f0b12f78fcf9f856d328164cd39c33264 +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d494032c0258ea452c3b59402f16bd0418054e74 --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,4816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.211777716339508, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.2998406260863795e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4100/README.md b/checkpoint-4100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4100/adapter_config.json b/checkpoint-4100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4100/adapter_model.bin b/checkpoint-4100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1aae0b696d51184efe0a54c7830999857a56343a --- /dev/null +++ b/checkpoint-4100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33757f3143395951b4a168f6021ec07fa717ba63bcfd9bee417cf20715dfccb7 +size 16821197 diff --git a/checkpoint-4100/finetuning_args.json b/checkpoint-4100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4100/optimizer.pt b/checkpoint-4100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a8c2f6150adba7c7b04b1c469026d4c8c174b75 --- /dev/null +++ b/checkpoint-4100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4825fab26a90ff49bcf1eba72584e9e1c0e3f4ff93141048af04628adbc60c +size 33629893 diff --git a/checkpoint-4100/rng_state.pth b/checkpoint-4100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e8d58bbeabb690edb01ccfb4da3ff317bac47f09 --- /dev/null +++ b/checkpoint-4100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe621811d43929d6ec65a1d388b467985dbbc23ca26dcd2e8af739ebd96ed38 +size 14575 diff --git a/checkpoint-4100/scaler.pt b/checkpoint-4100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..51ce75a91b6fae90013a6b1a06b67aba2cba5ed4 --- /dev/null +++ b/checkpoint-4100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b64e0782f4a599c1925201bfe938f8bc884cb767ac31445b12b941e10144457 +size 557 diff --git a/checkpoint-4100/scheduler.pt b/checkpoint-4100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..16a3acc67b26272e6091ce2aacddb7024bb3fcf8 --- /dev/null +++ b/checkpoint-4100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60fd02047dedca4042af01c0ea33516daff59f4876e89948309ba64b6c9ca771 +size 627 diff --git a/checkpoint-4100/trainer_state.json b/checkpoint-4100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a158a4148cf533cf8d8de389c9ac80ff74b9f761 --- /dev/null +++ b/checkpoint-4100/trainer_state.json @@ -0,0 +1,4936 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.2670721592479954, + "global_step": 4100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.3323376572707635e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4100/training_args.bin b/checkpoint-4100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4200/README.md b/checkpoint-4200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4200/adapter_config.json b/checkpoint-4200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4200/adapter_model.bin b/checkpoint-4200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6528e21e551b9d89d132def214e105ceb5fead96 --- /dev/null +++ b/checkpoint-4200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a00767126760d656d163c94962e0dd5715fc19438640a4fccdef3600df6087b +size 16821197 diff --git a/checkpoint-4200/finetuning_args.json b/checkpoint-4200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4200/optimizer.pt b/checkpoint-4200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bff1a2ece2ee45a8e8d4eddb2e3c4408996a21e --- /dev/null +++ b/checkpoint-4200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3413ef04f0c360d114af21eb73dd5403bef94073be19666f97232fb0e39bb7 +size 33629893 diff --git a/checkpoint-4200/rng_state.pth b/checkpoint-4200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..13b6f8fbd52a189c8f0e2563894ef7dbdd652154 --- /dev/null +++ b/checkpoint-4200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c2fd069dca971c6cad7681c507ca2fffa64926c82020bcdbea922ea4bdead6 +size 14575 diff --git a/checkpoint-4200/scaler.pt b/checkpoint-4200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca87970d5a1acc078a73c1230abc4bba43e53c62 --- /dev/null +++ b/checkpoint-4200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6739ad4d4ea0fc93096ea46dbf6a78da95cc9a5399abd824a3f8480efbdec6 +size 557 diff --git a/checkpoint-4200/scheduler.pt b/checkpoint-4200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e220c3b696867a7c17bb01643ae7bd9cbeeb9bd8 --- /dev/null +++ b/checkpoint-4200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3609f5a9e5c66e76bb1fad3c0086bff1754b24bb56cc8b44299273b1491874e1 +size 627 diff --git a/checkpoint-4200/trainer_state.json b/checkpoint-4200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..87cf1783890c5b9fe4757ac9f1b11b5cf70f7402 --- /dev/null +++ b/checkpoint-4200/trainer_state.json @@ -0,0 +1,5056 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.3223666021564835, + "global_step": 4200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.3648346884551475e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4200/training_args.bin b/checkpoint-4200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4300/README.md b/checkpoint-4300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4300/adapter_config.json b/checkpoint-4300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4300/adapter_model.bin b/checkpoint-4300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..64d4374e3d82497686a4d3455f7c02fb6e148f50 --- /dev/null +++ b/checkpoint-4300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350b1c7ba1ce04d2cd3262d13010bcca9d53674142f26ff95c01708e5d734d04 +size 16821197 diff --git a/checkpoint-4300/finetuning_args.json b/checkpoint-4300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4300/optimizer.pt b/checkpoint-4300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..750bb93b7141b9d2c1780af4a77c1ddd5576c242 --- /dev/null +++ b/checkpoint-4300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1e6b6d17e2711d4b63df13ca3ca18df7b1b1e6ecba266c182356152f67d859 +size 33629893 diff --git a/checkpoint-4300/rng_state.pth b/checkpoint-4300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed28fb53f150f3c205b775d80f83b9b164e696b5 --- /dev/null +++ b/checkpoint-4300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6768681755680606342226e2100975165558c3ab43ff83e02391d350e8e15012 +size 14575 diff --git a/checkpoint-4300/scaler.pt b/checkpoint-4300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b61c03fe1f370b1fe1b0bf2493dc4d8568e8c4dc --- /dev/null +++ b/checkpoint-4300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ebd45e136c500174307628ef9e73bbb34e367df01d31cbf296acad066dbd082 +size 557 diff --git a/checkpoint-4300/scheduler.pt b/checkpoint-4300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9567b662f74cc8a80dd0be712e323ac6bdfaaeb --- /dev/null +++ b/checkpoint-4300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eefac0bcc62ca3d6f6d6b52767548f3841a18b091456bcbaaad48310d55ee73 +size 627 diff --git a/checkpoint-4300/trainer_state.json b/checkpoint-4300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d371b6e582e37f55d79609414a24a7209dc17dc9 --- /dev/null +++ b/checkpoint-4300/trainer_state.json @@ -0,0 +1,5176 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.377661045064971, + "global_step": 4300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.3973317196395315e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4300/training_args.bin b/checkpoint-4300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4400/README.md b/checkpoint-4400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4400/adapter_config.json b/checkpoint-4400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4400/adapter_model.bin b/checkpoint-4400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..10fc63509af4b884d6fd6a4de78d6ad861f66da6 --- /dev/null +++ b/checkpoint-4400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba429d93e8e586cb1676d1c5ccbc3bc40cf5913fa7f9c536f2771f6873f9d765 +size 16821197 diff --git a/checkpoint-4400/finetuning_args.json b/checkpoint-4400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4400/optimizer.pt b/checkpoint-4400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cf7baa142197027646f7ac33c0a86b96f33fa75 --- /dev/null +++ b/checkpoint-4400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327552735038ce0fb834873cf4c309c135c8adad8478c51361170f48a407730a +size 33629893 diff --git a/checkpoint-4400/rng_state.pth b/checkpoint-4400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4642dd09b24fdbd2ab698d7ff28bebe410987279 --- /dev/null +++ b/checkpoint-4400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d83fc2a6c35718c4bc740941af1d366842e9d4745bbba1cf5d173638a693ce +size 14575 diff --git a/checkpoint-4400/scaler.pt b/checkpoint-4400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e57064f7b15a75f5281b6ebe7e5a078c9d102c39 --- /dev/null +++ b/checkpoint-4400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18e6f827cad7e02c4a5af7fad84c04a9f52239b5ecac9f555a6714369c9b1ea +size 557 diff --git a/checkpoint-4400/scheduler.pt b/checkpoint-4400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd3781a37746168fc2d0027fd1f6cda0412468c0 --- /dev/null +++ b/checkpoint-4400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be4644ce5cc62c35e247f59f3e25b426a3db7e7270feb884b27e7662319fbea7 +size 627 diff --git a/checkpoint-4400/trainer_state.json b/checkpoint-4400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..da9af0f2d0d7747b322ed80a0f89622ad637d2a2 --- /dev/null +++ b/checkpoint-4400/trainer_state.json @@ -0,0 +1,5296 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.4329554879734587, + "global_step": 4400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.4298287508239155e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4400/training_args.bin b/checkpoint-4400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4500/README.md b/checkpoint-4500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4500/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4500/adapter_config.json b/checkpoint-4500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4500/adapter_model.bin b/checkpoint-4500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..63af71a1c86d4dbb594316059878b6a592245d35 --- /dev/null +++ b/checkpoint-4500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e931e56d34cb63be8466e83a637ab4be2776def0f51df733eb7733df56530cec +size 16821197 diff --git a/checkpoint-4500/finetuning_args.json b/checkpoint-4500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4500/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4500/optimizer.pt b/checkpoint-4500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f475c9c14b335af91eabd89f7fc687cb12418d47 --- /dev/null +++ b/checkpoint-4500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05d011cebe944bfcd8308e5d48b112032544ac64967bc0c893a87aeeb0d97aa8 +size 33629893 diff --git a/checkpoint-4500/rng_state.pth b/checkpoint-4500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfffb63d47c2e9b69c137d6d0339913b2f01af5c --- /dev/null +++ b/checkpoint-4500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c56a0698ffdaaccf52a1df9b250ff68f5f4259770834971fe67ad7f83066ef +size 14575 diff --git a/checkpoint-4500/scaler.pt b/checkpoint-4500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c17efbbf682491b842f902ea02a7a9e7fb89d50 --- /dev/null +++ b/checkpoint-4500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f7e92e93639e115202345f1a47faf12ba9bd1c37f275e106cd04ee3b4956d7 +size 557 diff --git a/checkpoint-4500/scheduler.pt b/checkpoint-4500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9466d575ab56c7a7695ca70672edc9ed1e4940d7 --- /dev/null +++ b/checkpoint-4500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbae179294463c5869be71e913026a580b5d053866ca51da2efff9fe582ab925 +size 627 diff --git a/checkpoint-4500/trainer_state.json b/checkpoint-4500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4aa514681c252248afd22207045a914bedbacea1 --- /dev/null +++ b/checkpoint-4500/trainer_state.json @@ -0,0 +1,5416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.4882499308819463, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.4623257820082995e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4500/training_args.bin b/checkpoint-4500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4600/README.md b/checkpoint-4600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4600/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4600/adapter_config.json b/checkpoint-4600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4600/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4600/adapter_model.bin b/checkpoint-4600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..84f15a8066564774e19df1d48e69c8aeffdddc91 --- /dev/null +++ b/checkpoint-4600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b84309c1a8b9b981134d5694bafcffcd9277a9fb09cd7a8c03ead2ca9e3d0bb4 +size 16821197 diff --git a/checkpoint-4600/finetuning_args.json b/checkpoint-4600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4600/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4600/optimizer.pt b/checkpoint-4600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..23b1ae41cf25286119cb293819846164bdf3094b --- /dev/null +++ b/checkpoint-4600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92a3f056b24dfd120970d98286fba1188c9a8f8a5c6cba53ffc9a1899cce7a49 +size 33629893 diff --git a/checkpoint-4600/rng_state.pth b/checkpoint-4600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d088cd5c24f8aa357ac940821fab3c896a86f74 --- /dev/null +++ b/checkpoint-4600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b7f5551f2aba8cd5b61749f5760d2efd08e5f4eeba428d0e4ac356bb04bd54 +size 14575 diff --git a/checkpoint-4600/scaler.pt b/checkpoint-4600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c3dd5b01d0eec5fb56a8be85b16e153eea47b09 --- /dev/null +++ b/checkpoint-4600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c236f4052ba42a610bd9ada0c5c5892e35fbda0ef4a00f2da2be78658a54d88d +size 557 diff --git a/checkpoint-4600/scheduler.pt b/checkpoint-4600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcc702b72e9f09a723c2b48bb1bf2e1876e79ed5 --- /dev/null +++ b/checkpoint-4600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9306e9387db421e3dfe8b2b52a320788ce4c48f5350a94a8a4ce7f9f19595f70 +size 627 diff --git a/checkpoint-4600/trainer_state.json b/checkpoint-4600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f2353c74635c1289b46d3148e1eb7fca8b80b0c8 --- /dev/null +++ b/checkpoint-4600/trainer_state.json @@ -0,0 +1,5536 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.543544373790434, + "global_step": 4600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.4948228131926835e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4600/training_args.bin b/checkpoint-4600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4700/README.md b/checkpoint-4700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4700/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4700/adapter_config.json b/checkpoint-4700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4700/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4700/adapter_model.bin b/checkpoint-4700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e363ded02493f6288815c7f8af45d2fd9655164 --- /dev/null +++ b/checkpoint-4700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc890b37ceb95e0d70e7a259c90ae0393dac2e9d7a8157dcd21659927561066 +size 16821197 diff --git a/checkpoint-4700/finetuning_args.json b/checkpoint-4700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4700/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4700/optimizer.pt b/checkpoint-4700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cb88bc6afd9ca6e08c95febd64745a08046717d --- /dev/null +++ b/checkpoint-4700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f67957933cf75132b3374ab4307a313b6d741afc710cf7f77b7b9b0d0fd59c +size 33629893 diff --git a/checkpoint-4700/rng_state.pth b/checkpoint-4700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e5ca6b2b247c86440813845dce19be482bd4882 --- /dev/null +++ b/checkpoint-4700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1b2cfef89ef7bfa8f32a2deddd066b110571c9c12d81a1fdffdbefa723203f4 +size 14575 diff --git a/checkpoint-4700/scaler.pt b/checkpoint-4700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a161f3dff1a140638d5e0be70d7efbb21b653120 --- /dev/null +++ b/checkpoint-4700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79907272265d3e8fe077c76e45ef92f91446af2ec05f1ac097079ba4d717362e +size 557 diff --git a/checkpoint-4700/scheduler.pt b/checkpoint-4700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0252448509431f0e18b2c6182722577270c5420e --- /dev/null +++ b/checkpoint-4700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d569604dccce2b78ccf122fc16375973b4f2f7f2bd134f79184bb6f0158c805 +size 627 diff --git a/checkpoint-4700/trainer_state.json b/checkpoint-4700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..97fb693bdf7cc767962b6fae82fe9bbf0e1cde3c --- /dev/null +++ b/checkpoint-4700/trainer_state.json @@ -0,0 +1,5656 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5988388166989216, + "global_step": 4700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.5273198443770675e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4700/training_args.bin b/checkpoint-4700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4800/README.md b/checkpoint-4800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4800/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4800/adapter_config.json b/checkpoint-4800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4800/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4800/adapter_model.bin b/checkpoint-4800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4deba6cc1dd55c0419ab4d3ad4282dbb949e6919 --- /dev/null +++ b/checkpoint-4800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6017227a66d783ba7a034d59cb1f1773eff252b85885fab0c9ee02601c38546c +size 16821197 diff --git a/checkpoint-4800/finetuning_args.json b/checkpoint-4800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4800/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4800/optimizer.pt b/checkpoint-4800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a8bbc8dfc6cd558ac376910894e06618c392e2f --- /dev/null +++ b/checkpoint-4800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1fde6b50024cf91fafdbee23ee208e78063d17123d2c5302f600c3474759f0 +size 33629893 diff --git a/checkpoint-4800/rng_state.pth b/checkpoint-4800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c53eb7ba6600e637130f751b1a3d21b315217827 --- /dev/null +++ b/checkpoint-4800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7775e429eb420f572318e51f6150ec688bcb1cfe83afcb51a7f4b0491393fe6 +size 14575 diff --git a/checkpoint-4800/scaler.pt b/checkpoint-4800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8b73aad133433dd7c5c161a0cb51ad204ab430f --- /dev/null +++ b/checkpoint-4800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:420c3442145823e0a01550a34733b059817d4d6441672a50c99fa8b32e3dd0d0 +size 557 diff --git a/checkpoint-4800/scheduler.pt b/checkpoint-4800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d693264aad00302d5f37e900547d73f34fe8efb --- /dev/null +++ b/checkpoint-4800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11359e143b89cf9e29e3084c6c3b78fea6484747851f4fb17dd1a50f89352ad2 +size 627 diff --git a/checkpoint-4800/trainer_state.json b/checkpoint-4800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5ff85654bc7a9674bd9e5613fe5213bca397ce49 --- /dev/null +++ b/checkpoint-4800/trainer_state.json @@ -0,0 +1,5776 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.654133259607409, + "global_step": 4800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.5598168755614515e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4800/training_args.bin b/checkpoint-4800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-4900/README.md b/checkpoint-4900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-4900/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-4900/adapter_config.json b/checkpoint-4900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-4900/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4900/adapter_model.bin b/checkpoint-4900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..98a4be47384cdd52314dfb7a1904af7660de5ce6 --- /dev/null +++ b/checkpoint-4900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:237303a383021813dc369f82eb7d3ca180979aa1df62107e077398f04c44d696 +size 16821197 diff --git a/checkpoint-4900/finetuning_args.json b/checkpoint-4900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4900/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4900/optimizer.pt b/checkpoint-4900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b128746cfbb7a9f3dad784cce831c2206af3166 --- /dev/null +++ b/checkpoint-4900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82dd97501e579ebb7253aaf4c3c90162e307dd9aa332e3beca3ce6f0411d2d07 +size 33629893 diff --git a/checkpoint-4900/rng_state.pth b/checkpoint-4900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5c30dd3455ebb9918a66b1348ffeb7b321dc1ca --- /dev/null +++ b/checkpoint-4900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f21c0404f2a7f1c92fd1003ffcda8335e87b000bf3a73b6a4e60db857a30f94a +size 14575 diff --git a/checkpoint-4900/scaler.pt b/checkpoint-4900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f7aff4a5a0417e0d621075c8a8813aeae66090c --- /dev/null +++ b/checkpoint-4900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd27fb19a16ac2bb074e1f61d50f152249e93985b23e84f00ac8b9d1a5c5217 +size 557 diff --git a/checkpoint-4900/scheduler.pt b/checkpoint-4900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..efe47f70ec31e5f6646335945e6fb501efebcb25 --- /dev/null +++ b/checkpoint-4900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59aa6aebaef179d2472e6a56ef167e46fb262342761174eecae6fda90380c9e8 +size 627 diff --git a/checkpoint-4900/trainer_state.json b/checkpoint-4900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eaf2d4d6a31ca1c10dbca366a1e899e639740131 --- /dev/null +++ b/checkpoint-4900/trainer_state.json @@ -0,0 +1,5896 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.7094277025158973, + "global_step": 4900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.5923139067458355e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4900/training_args.bin b/checkpoint-4900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-4900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-500/README.md b/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-500/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-500/adapter_config.json b/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-500/adapter_model.bin b/checkpoint-500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff386bbb11d072e10df0d1471e4586c317c687ba --- /dev/null +++ b/checkpoint-500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad932954f71b60d7c44554696ebd2df172d24b30841ecb5888ba057fa3f6370f +size 16821197 diff --git a/checkpoint-500/finetuning_args.json b/checkpoint-500/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-500/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..24a3ecac86dc6759c9a9d6b39be83e67948521f6 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303e479cc3d66d0d4d065a93cf34e575ffcbd7809e54156cd2552cf21db80b2e +size 33629893 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae2e0584c60a848f7beae78eb94de4aa0dadcbeb --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:355984b476d00dd4f156686f73e833557e23fd7b0e5f80526a208f891ed5e453 +size 14575 diff --git a/checkpoint-500/scaler.pt b/checkpoint-500/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5f2e5ee77c2cef5eff816170558ce9209855685 --- /dev/null +++ b/checkpoint-500/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f196a54202bb4ba1220e8c59f42f9cda0702d68ea83147d814c2fb2f36b8f2 +size 557 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c6a01d5ef88ab4e89aa53b1cee6f066f29d2a10 --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be78d2c8c1067138eb2333c1a97fa65f974d4f3acd31f4e79965497c65336a3a +size 627 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cd4cd497571f90cb4e44b09ed1a6afceb2ac071b --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2764722145424385, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.6248515592192e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-5000/README.md b/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-5000/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-5000/adapter_config.json b/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-5000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5000/adapter_model.bin b/checkpoint-5000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e05977fddc2cf889d558eb9b28d4a79c18bde03 --- /dev/null +++ b/checkpoint-5000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06224298f41aec729d2b8cda79bdd75d8e31e31370e18633b26eea4735ea8378 +size 16821197 diff --git a/checkpoint-5000/finetuning_args.json b/checkpoint-5000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6aab8112961f7e2cc954400a7a224a392aff4710 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81494c573186fb6bfb977940ecf0206d2b9a197fad925599d7598fa7a3d51c65 +size 33629893 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb115774d4bc33ed0cd8a186dea76892e28424ea --- /dev/null +++ b/checkpoint-5000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4380204d28884247de7d7f44b8905a722fceb081f130d5e50416893c7fad14a8 +size 14575 diff --git a/checkpoint-5000/scaler.pt b/checkpoint-5000/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab82f3d3e55d082d5183e0e78c0ecace914590c2 --- /dev/null +++ b/checkpoint-5000/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed308f53e68b6f576907584838b938f8d640f8dec0127b2cf14ba0917756d13 +size 557 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9e74aaeffb10ec1dbd8b186531bb710eff68604 --- /dev/null +++ b/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40562eb8fcb912e073116210f416da4c2203b0095783bf7185c2b9ae1f934ce1 +size 627 diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d961c3406759cfdbc5b558d886aaf232a862729a --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,6016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.764722145424385, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.6248109379302195e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-5100/README.md b/checkpoint-5100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-5100/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-5100/adapter_config.json b/checkpoint-5100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-5100/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5100/adapter_model.bin b/checkpoint-5100/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6daf1d81faccf7f03cdcaa9415c74e660aca9ed2 --- /dev/null +++ b/checkpoint-5100/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd6483d8ef790f2862eb7420a19cef3ff2e31dca8bb5f7fd925c7c8377c698b +size 16821197 diff --git a/checkpoint-5100/finetuning_args.json b/checkpoint-5100/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5100/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5100/optimizer.pt b/checkpoint-5100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af16742a8b5d9320a59afe9d531f920c7568a618 --- /dev/null +++ b/checkpoint-5100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56b76f1dcaa758c74a6733c7696ec46c7cb0248b01df411743c3e5d97bb34025 +size 33629893 diff --git a/checkpoint-5100/rng_state.pth b/checkpoint-5100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..04e310fe7c2cd871f22a8699ef9dccf69976d8ac --- /dev/null +++ b/checkpoint-5100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfbd5e7db7b513113c2ea3137ebf1895eed2524895a8862202dd854be9225e4 +size 14575 diff --git a/checkpoint-5100/scaler.pt b/checkpoint-5100/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fb986e3b69f939dfa20264f4a43254920093aa6 --- /dev/null +++ b/checkpoint-5100/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593bf2c6336d806980da7c170d40e010095f92cec7c82410f9e118c49250c01f +size 557 diff --git a/checkpoint-5100/scheduler.pt b/checkpoint-5100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a83978c630ae70e546f68d2fcb2fb6530e2a3396 --- /dev/null +++ b/checkpoint-5100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa03409c5f52944c2936ca1457b671d7816ec89295d6313c5a0d5b6a5dda4e4 +size 627 diff --git a/checkpoint-5100/trainer_state.json b/checkpoint-5100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dae8e9557f7b7b43397b6605c021eefb9fc786d8 --- /dev/null +++ b/checkpoint-5100/trainer_state.json @@ -0,0 +1,6136 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.8200165883328725, + "global_step": 5100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + }, + { + "epoch": 2.77, + "learning_rate": 7.360820099177712e-07, + "loss": 0.9102, + "step": 5005 + }, + { + "epoch": 2.77, + "learning_rate": 7.187445347556859e-07, + "loss": 0.9198, + "step": 5010 + }, + { + "epoch": 2.77, + "learning_rate": 7.016107034098524e-07, + "loss": 0.9004, + "step": 5015 + }, + { + "epoch": 2.78, + "learning_rate": 6.846806595795424e-07, + "loss": 0.8849, + "step": 5020 + }, + { + "epoch": 2.78, + "learning_rate": 6.679545452548924e-07, + "loss": 0.9473, + "step": 5025 + }, + { + "epoch": 2.78, + "learning_rate": 6.514325007157013e-07, + "loss": 0.9339, + "step": 5030 + }, + { + "epoch": 2.78, + "learning_rate": 6.35114664530273e-07, + "loss": 0.9172, + "step": 5035 + }, + { + "epoch": 2.79, + "learning_rate": 6.190011735542262e-07, + "loss": 0.9796, + "step": 5040 + }, + { + "epoch": 2.79, + "learning_rate": 6.030921629293778e-07, + "loss": 0.9367, + "step": 5045 + }, + { + "epoch": 2.79, + "learning_rate": 5.873877660825783e-07, + "loss": 0.9254, + "step": 5050 + }, + { + "epoch": 2.8, + "learning_rate": 5.718881147246252e-07, + "loss": 0.8976, + "step": 5055 + }, + { + "epoch": 2.8, + "learning_rate": 5.565933388491263e-07, + "loss": 0.9099, + "step": 5060 + }, + { + "epoch": 2.8, + "learning_rate": 5.415035667314328e-07, + "loss": 0.9057, + "step": 5065 + }, + { + "epoch": 2.8, + "learning_rate": 5.266189249275521e-07, + "loss": 0.8978, + "step": 5070 + }, + { + "epoch": 2.81, + "learning_rate": 5.119395382730929e-07, + "loss": 0.9172, + "step": 5075 + }, + { + "epoch": 2.81, + "learning_rate": 4.974655298822129e-07, + "loss": 0.9393, + "step": 5080 + }, + { + "epoch": 2.81, + "learning_rate": 4.831970211465892e-07, + "loss": 0.9137, + "step": 5085 + }, + { + "epoch": 2.81, + "learning_rate": 4.6913413173439723e-07, + "loss": 0.8573, + "step": 5090 + }, + { + "epoch": 2.82, + "learning_rate": 4.552769795893086e-07, + "loss": 0.9533, + "step": 5095 + }, + { + "epoch": 2.82, + "learning_rate": 4.416256809295083e-07, + "loss": 0.8693, + "step": 5100 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.6573079691146035e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5100/training_args.bin b/checkpoint-5100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-5100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-5200/README.md b/checkpoint-5200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-5200/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-5200/adapter_config.json b/checkpoint-5200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-5200/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5200/adapter_model.bin b/checkpoint-5200/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f13874cc906e86b2adece2b967e557782926e0ef --- /dev/null +++ b/checkpoint-5200/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7beeb49a38c1a40e860b3b18a8dbd9d914dce3e209cd7bb968f5d5e00eb5b1c4 +size 16821197 diff --git a/checkpoint-5200/finetuning_args.json b/checkpoint-5200/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5200/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5200/optimizer.pt b/checkpoint-5200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c3d38ef9a86988e62c2c43d9ab943cb2f14b132 --- /dev/null +++ b/checkpoint-5200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:927e5f172e03b535159b8d1c587e4f8caf73496f4662db7f4f7fcf4fe8e620d4 +size 33629893 diff --git a/checkpoint-5200/rng_state.pth b/checkpoint-5200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..32b08fd7592bd557aeef407c6c8c8d637ffe0a9f --- /dev/null +++ b/checkpoint-5200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8848c311c1375a66cf75f12881aebb211210e38626d37c1dba34dc7426d5b6a9 +size 14575 diff --git a/checkpoint-5200/scaler.pt b/checkpoint-5200/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..76c6972e736916f782241cd2041d08b76be6672e --- /dev/null +++ b/checkpoint-5200/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f583bfd6a7ab7bd01064943d5a5c9d62ecc2cad7ccfe023b751519471faa787b +size 557 diff --git a/checkpoint-5200/scheduler.pt b/checkpoint-5200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd280afb719fdade7a8e3442565d18f5061594de --- /dev/null +++ b/checkpoint-5200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c20167db31ac9a567cbf1bd255f2e0e01cf54ef586bf1aca9f9bb52615408d5 +size 627 diff --git a/checkpoint-5200/trainer_state.json b/checkpoint-5200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..37d398f9d2405377da3a0c76e908a9a845194fca --- /dev/null +++ b/checkpoint-5200/trainer_state.json @@ -0,0 +1,6256 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.87531103124136, + "global_step": 5200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + }, + { + "epoch": 2.77, + "learning_rate": 7.360820099177712e-07, + "loss": 0.9102, + "step": 5005 + }, + { + "epoch": 2.77, + "learning_rate": 7.187445347556859e-07, + "loss": 0.9198, + "step": 5010 + }, + { + "epoch": 2.77, + "learning_rate": 7.016107034098524e-07, + "loss": 0.9004, + "step": 5015 + }, + { + "epoch": 2.78, + "learning_rate": 6.846806595795424e-07, + "loss": 0.8849, + "step": 5020 + }, + { + "epoch": 2.78, + "learning_rate": 6.679545452548924e-07, + "loss": 0.9473, + "step": 5025 + }, + { + "epoch": 2.78, + "learning_rate": 6.514325007157013e-07, + "loss": 0.9339, + "step": 5030 + }, + { + "epoch": 2.78, + "learning_rate": 6.35114664530273e-07, + "loss": 0.9172, + "step": 5035 + }, + { + "epoch": 2.79, + "learning_rate": 6.190011735542262e-07, + "loss": 0.9796, + "step": 5040 + }, + { + "epoch": 2.79, + "learning_rate": 6.030921629293778e-07, + "loss": 0.9367, + "step": 5045 + }, + { + "epoch": 2.79, + "learning_rate": 5.873877660825783e-07, + "loss": 0.9254, + "step": 5050 + }, + { + "epoch": 2.8, + "learning_rate": 5.718881147246252e-07, + "loss": 0.8976, + "step": 5055 + }, + { + "epoch": 2.8, + "learning_rate": 5.565933388491263e-07, + "loss": 0.9099, + "step": 5060 + }, + { + "epoch": 2.8, + "learning_rate": 5.415035667314328e-07, + "loss": 0.9057, + "step": 5065 + }, + { + "epoch": 2.8, + "learning_rate": 5.266189249275521e-07, + "loss": 0.8978, + "step": 5070 + }, + { + "epoch": 2.81, + "learning_rate": 5.119395382730929e-07, + "loss": 0.9172, + "step": 5075 + }, + { + "epoch": 2.81, + "learning_rate": 4.974655298822129e-07, + "loss": 0.9393, + "step": 5080 + }, + { + "epoch": 2.81, + "learning_rate": 4.831970211465892e-07, + "loss": 0.9137, + "step": 5085 + }, + { + "epoch": 2.81, + "learning_rate": 4.6913413173439723e-07, + "loss": 0.8573, + "step": 5090 + }, + { + "epoch": 2.82, + "learning_rate": 4.552769795893086e-07, + "loss": 0.9533, + "step": 5095 + }, + { + "epoch": 2.82, + "learning_rate": 4.416256809295083e-07, + "loss": 0.8693, + "step": 5100 + }, + { + "epoch": 2.82, + "learning_rate": 4.2818035024670963e-07, + "loss": 0.9531, + "step": 5105 + }, + { + "epoch": 2.83, + "learning_rate": 4.1494110030519397e-07, + "loss": 0.9077, + "step": 5110 + }, + { + "epoch": 2.83, + "learning_rate": 4.019080421408833e-07, + "loss": 0.9061, + "step": 5115 + }, + { + "epoch": 2.83, + "learning_rate": 3.8908128506037756e-07, + "loss": 0.9606, + "step": 5120 + }, + { + "epoch": 2.83, + "learning_rate": 3.7646093664007456e-07, + "loss": 0.9335, + "step": 5125 + }, + { + "epoch": 2.84, + "learning_rate": 3.640471027252346e-07, + "loss": 0.9054, + "step": 5130 + }, + { + "epoch": 2.84, + "learning_rate": 3.5183988742910903e-07, + "loss": 0.8801, + "step": 5135 + }, + { + "epoch": 2.84, + "learning_rate": 3.398393931320687e-07, + "loss": 0.913, + "step": 5140 + }, + { + "epoch": 2.84, + "learning_rate": 3.2804572048074357e-07, + "loss": 0.904, + "step": 5145 + }, + { + "epoch": 2.85, + "learning_rate": 3.164589683871705e-07, + "loss": 0.9144, + "step": 5150 + }, + { + "epoch": 2.85, + "learning_rate": 3.050792340279718e-07, + "loss": 0.9145, + "step": 5155 + }, + { + "epoch": 2.85, + "learning_rate": 2.939066128435419e-07, + "loss": 0.957, + "step": 5160 + }, + { + "epoch": 2.86, + "learning_rate": 2.829411985372399e-07, + "loss": 0.9196, + "step": 5165 + }, + { + "epoch": 2.86, + "learning_rate": 2.7218308307460916e-07, + "loss": 0.8893, + "step": 5170 + }, + { + "epoch": 2.86, + "learning_rate": 2.616323566825979e-07, + "loss": 0.9049, + "step": 5175 + }, + { + "epoch": 2.86, + "learning_rate": 2.51289107848815e-07, + "loss": 0.8977, + "step": 5180 + }, + { + "epoch": 2.87, + "learning_rate": 2.4115342332078074e-07, + "loss": 0.9187, + "step": 5185 + }, + { + "epoch": 2.87, + "learning_rate": 2.312253881051968e-07, + "loss": 0.9355, + "step": 5190 + }, + { + "epoch": 2.87, + "learning_rate": 2.2150508546723848e-07, + "loss": 0.9337, + "step": 5195 + }, + { + "epoch": 2.88, + "learning_rate": 2.119925969298553e-07, + "loss": 0.9011, + "step": 5200 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.6898050002989875e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5200/training_args.bin b/checkpoint-5200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-5200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-5300/README.md b/checkpoint-5300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-5300/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-5300/adapter_config.json b/checkpoint-5300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-5300/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5300/adapter_model.bin b/checkpoint-5300/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7583600eaab4a9649618f80a76fbfe95ea7c52f --- /dev/null +++ b/checkpoint-5300/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e239e135b9e795d47e41f90aa35ebdfe89952dd2e5dde4a73b109d27e3ea816c +size 16821197 diff --git a/checkpoint-5300/finetuning_args.json b/checkpoint-5300/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5300/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5300/optimizer.pt b/checkpoint-5300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc885912d5b23b437a88543c85f233f56917da48 --- /dev/null +++ b/checkpoint-5300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080678af6a77bd161dc08b90a7b4850071fef6a2f3bc546d146f914a48a7dc69 +size 33629893 diff --git a/checkpoint-5300/rng_state.pth b/checkpoint-5300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd05e559f13d84a2b81f2edd60d55cce3ae7fbdd --- /dev/null +++ b/checkpoint-5300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6198a7f69d8792204737f6938eb0c775e16c4b7cc32c80554d0b512fc6d6de52 +size 14575 diff --git a/checkpoint-5300/scaler.pt b/checkpoint-5300/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..085782aaa7dc5fbe56c9b4bf65db6b31fa3027ab --- /dev/null +++ b/checkpoint-5300/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94399f6da671640eb20b067dbd57bbe501b21514b9d06907a77a0330920ed0f6 +size 557 diff --git a/checkpoint-5300/scheduler.pt b/checkpoint-5300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ae3b7033dd6f47d3cd81191607f6bb7750e5d85 --- /dev/null +++ b/checkpoint-5300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:807ed65db53378f2db9e9df07fe48a8fb94655ba05d1384d2930d3e3bf5ae93d +size 627 diff --git a/checkpoint-5300/trainer_state.json b/checkpoint-5300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dd68ddddfc4f2115d983f4fe24945cd3ea593ce6 --- /dev/null +++ b/checkpoint-5300/trainer_state.json @@ -0,0 +1,6376 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.930605474149848, + "global_step": 5300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + }, + { + "epoch": 2.77, + "learning_rate": 7.360820099177712e-07, + "loss": 0.9102, + "step": 5005 + }, + { + "epoch": 2.77, + "learning_rate": 7.187445347556859e-07, + "loss": 0.9198, + "step": 5010 + }, + { + "epoch": 2.77, + "learning_rate": 7.016107034098524e-07, + "loss": 0.9004, + "step": 5015 + }, + { + "epoch": 2.78, + "learning_rate": 6.846806595795424e-07, + "loss": 0.8849, + "step": 5020 + }, + { + "epoch": 2.78, + "learning_rate": 6.679545452548924e-07, + "loss": 0.9473, + "step": 5025 + }, + { + "epoch": 2.78, + "learning_rate": 6.514325007157013e-07, + "loss": 0.9339, + "step": 5030 + }, + { + "epoch": 2.78, + "learning_rate": 6.35114664530273e-07, + "loss": 0.9172, + "step": 5035 + }, + { + "epoch": 2.79, + "learning_rate": 6.190011735542262e-07, + "loss": 0.9796, + "step": 5040 + }, + { + "epoch": 2.79, + "learning_rate": 6.030921629293778e-07, + "loss": 0.9367, + "step": 5045 + }, + { + "epoch": 2.79, + "learning_rate": 5.873877660825783e-07, + "loss": 0.9254, + "step": 5050 + }, + { + "epoch": 2.8, + "learning_rate": 5.718881147246252e-07, + "loss": 0.8976, + "step": 5055 + }, + { + "epoch": 2.8, + "learning_rate": 5.565933388491263e-07, + "loss": 0.9099, + "step": 5060 + }, + { + "epoch": 2.8, + "learning_rate": 5.415035667314328e-07, + "loss": 0.9057, + "step": 5065 + }, + { + "epoch": 2.8, + "learning_rate": 5.266189249275521e-07, + "loss": 0.8978, + "step": 5070 + }, + { + "epoch": 2.81, + "learning_rate": 5.119395382730929e-07, + "loss": 0.9172, + "step": 5075 + }, + { + "epoch": 2.81, + "learning_rate": 4.974655298822129e-07, + "loss": 0.9393, + "step": 5080 + }, + { + "epoch": 2.81, + "learning_rate": 4.831970211465892e-07, + "loss": 0.9137, + "step": 5085 + }, + { + "epoch": 2.81, + "learning_rate": 4.6913413173439723e-07, + "loss": 0.8573, + "step": 5090 + }, + { + "epoch": 2.82, + "learning_rate": 4.552769795893086e-07, + "loss": 0.9533, + "step": 5095 + }, + { + "epoch": 2.82, + "learning_rate": 4.416256809295083e-07, + "loss": 0.8693, + "step": 5100 + }, + { + "epoch": 2.82, + "learning_rate": 4.2818035024670963e-07, + "loss": 0.9531, + "step": 5105 + }, + { + "epoch": 2.83, + "learning_rate": 4.1494110030519397e-07, + "loss": 0.9077, + "step": 5110 + }, + { + "epoch": 2.83, + "learning_rate": 4.019080421408833e-07, + "loss": 0.9061, + "step": 5115 + }, + { + "epoch": 2.83, + "learning_rate": 3.8908128506037756e-07, + "loss": 0.9606, + "step": 5120 + }, + { + "epoch": 2.83, + "learning_rate": 3.7646093664007456e-07, + "loss": 0.9335, + "step": 5125 + }, + { + "epoch": 2.84, + "learning_rate": 3.640471027252346e-07, + "loss": 0.9054, + "step": 5130 + }, + { + "epoch": 2.84, + "learning_rate": 3.5183988742910903e-07, + "loss": 0.8801, + "step": 5135 + }, + { + "epoch": 2.84, + "learning_rate": 3.398393931320687e-07, + "loss": 0.913, + "step": 5140 + }, + { + "epoch": 2.84, + "learning_rate": 3.2804572048074357e-07, + "loss": 0.904, + "step": 5145 + }, + { + "epoch": 2.85, + "learning_rate": 3.164589683871705e-07, + "loss": 0.9144, + "step": 5150 + }, + { + "epoch": 2.85, + "learning_rate": 3.050792340279718e-07, + "loss": 0.9145, + "step": 5155 + }, + { + "epoch": 2.85, + "learning_rate": 2.939066128435419e-07, + "loss": 0.957, + "step": 5160 + }, + { + "epoch": 2.86, + "learning_rate": 2.829411985372399e-07, + "loss": 0.9196, + "step": 5165 + }, + { + "epoch": 2.86, + "learning_rate": 2.7218308307460916e-07, + "loss": 0.8893, + "step": 5170 + }, + { + "epoch": 2.86, + "learning_rate": 2.616323566825979e-07, + "loss": 0.9049, + "step": 5175 + }, + { + "epoch": 2.86, + "learning_rate": 2.51289107848815e-07, + "loss": 0.8977, + "step": 5180 + }, + { + "epoch": 2.87, + "learning_rate": 2.4115342332078074e-07, + "loss": 0.9187, + "step": 5185 + }, + { + "epoch": 2.87, + "learning_rate": 2.312253881051968e-07, + "loss": 0.9355, + "step": 5190 + }, + { + "epoch": 2.87, + "learning_rate": 2.2150508546723848e-07, + "loss": 0.9337, + "step": 5195 + }, + { + "epoch": 2.88, + "learning_rate": 2.119925969298553e-07, + "loss": 0.9011, + "step": 5200 + }, + { + "epoch": 2.88, + "learning_rate": 2.0268800227307982e-07, + "loss": 0.8987, + "step": 5205 + }, + { + "epoch": 2.88, + "learning_rate": 1.9359137953337548e-07, + "loss": 0.9206, + "step": 5210 + }, + { + "epoch": 2.88, + "learning_rate": 1.8470280500296199e-07, + "loss": 0.9485, + "step": 5215 + }, + { + "epoch": 2.89, + "learning_rate": 1.7602235322919102e-07, + "loss": 0.8902, + "step": 5220 + }, + { + "epoch": 2.89, + "learning_rate": 1.6755009701391045e-07, + "loss": 0.9484, + "step": 5225 + }, + { + "epoch": 2.89, + "learning_rate": 1.592861074128621e-07, + "loss": 0.9361, + "step": 5230 + }, + { + "epoch": 2.89, + "learning_rate": 1.5123045373508226e-07, + "loss": 0.9407, + "step": 5235 + }, + { + "epoch": 2.9, + "learning_rate": 1.4338320354231605e-07, + "loss": 0.8792, + "step": 5240 + }, + { + "epoch": 2.9, + "learning_rate": 1.3574442264846222e-07, + "loss": 0.9682, + "step": 5245 + }, + { + "epoch": 2.9, + "learning_rate": 1.2831417511900423e-07, + "loss": 0.8995, + "step": 5250 + }, + { + "epoch": 2.91, + "learning_rate": 1.2109252327048849e-07, + "loss": 0.8877, + "step": 5255 + }, + { + "epoch": 2.91, + "learning_rate": 1.1407952766999686e-07, + "loss": 0.8746, + "step": 5260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0727524713463333e-07, + "loss": 0.9287, + "step": 5265 + }, + { + "epoch": 2.91, + "learning_rate": 1.0067973873104097e-07, + "loss": 0.9045, + "step": 5270 + }, + { + "epoch": 2.92, + "learning_rate": 9.42930577749107e-08, + "loss": 0.942, + "step": 5275 + }, + { + "epoch": 2.92, + "learning_rate": 8.811525783052888e-08, + "loss": 0.885, + "step": 5280 + }, + { + "epoch": 2.92, + "learning_rate": 8.214639071031926e-08, + "loss": 0.8947, + "step": 5285 + }, + { + "epoch": 2.93, + "learning_rate": 7.638650647442125e-08, + "loss": 0.9309, + "step": 5290 + }, + { + "epoch": 2.93, + "learning_rate": 7.083565343024845e-08, + "loss": 0.896, + "step": 5295 + }, + { + "epoch": 2.93, + "learning_rate": 6.549387813210572e-08, + "loss": 0.9132, + "step": 5300 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.7223020314833715e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5300/training_args.bin b/checkpoint-5300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-5300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-5400/README.md b/checkpoint-5400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-5400/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-5400/adapter_config.json b/checkpoint-5400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-5400/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5400/adapter_model.bin b/checkpoint-5400/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5726685aa93c304a211ee34c10af725f496865ee --- /dev/null +++ b/checkpoint-5400/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fe8938f7cbe4b1cf47ff6bd11e8c5b1b910a8bdc9a5e3a2d3c0145d72c37cb +size 16821197 diff --git a/checkpoint-5400/finetuning_args.json b/checkpoint-5400/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5400/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5400/optimizer.pt b/checkpoint-5400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3419201c16cd70326d11695360664d1aa3558287 --- /dev/null +++ b/checkpoint-5400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4478843aa44123797e2e0a05a158244495f7b8f9ee8583357dc81873f375c1 +size 33629893 diff --git a/checkpoint-5400/rng_state.pth b/checkpoint-5400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..407823b2176ca72220aa93b4f54ae008d85b271d --- /dev/null +++ b/checkpoint-5400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37383e19c14c8a6691ed96ee211c9d42474187bfa9db493007f9cea18326f8c3 +size 14575 diff --git a/checkpoint-5400/scaler.pt b/checkpoint-5400/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b68ab17cab147d0105b7b2ed669762d4c4f3bc7 --- /dev/null +++ b/checkpoint-5400/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa77827a38713cd02e03532f556a57810ca748d63ab74d4b1111e1cbec048d60 +size 557 diff --git a/checkpoint-5400/scheduler.pt b/checkpoint-5400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e53d8b2d2b034bcb6a78468fc787809b301b519d --- /dev/null +++ b/checkpoint-5400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57928e0b9c3c204e200f83cfd550e14a4e04b9158fe8849d9abe0fb67bcd70d +size 627 diff --git a/checkpoint-5400/trainer_state.json b/checkpoint-5400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6628c10cf4c49634d4fafe18aeaafe91e64bee05 --- /dev/null +++ b/checkpoint-5400/trainer_state.json @@ -0,0 +1,6496 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.985899917058336, + "global_step": 5400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + }, + { + "epoch": 2.77, + "learning_rate": 7.360820099177712e-07, + "loss": 0.9102, + "step": 5005 + }, + { + "epoch": 2.77, + "learning_rate": 7.187445347556859e-07, + "loss": 0.9198, + "step": 5010 + }, + { + "epoch": 2.77, + "learning_rate": 7.016107034098524e-07, + "loss": 0.9004, + "step": 5015 + }, + { + "epoch": 2.78, + "learning_rate": 6.846806595795424e-07, + "loss": 0.8849, + "step": 5020 + }, + { + "epoch": 2.78, + "learning_rate": 6.679545452548924e-07, + "loss": 0.9473, + "step": 5025 + }, + { + "epoch": 2.78, + "learning_rate": 6.514325007157013e-07, + "loss": 0.9339, + "step": 5030 + }, + { + "epoch": 2.78, + "learning_rate": 6.35114664530273e-07, + "loss": 0.9172, + "step": 5035 + }, + { + "epoch": 2.79, + "learning_rate": 6.190011735542262e-07, + "loss": 0.9796, + "step": 5040 + }, + { + "epoch": 2.79, + "learning_rate": 6.030921629293778e-07, + "loss": 0.9367, + "step": 5045 + }, + { + "epoch": 2.79, + "learning_rate": 5.873877660825783e-07, + "loss": 0.9254, + "step": 5050 + }, + { + "epoch": 2.8, + "learning_rate": 5.718881147246252e-07, + "loss": 0.8976, + "step": 5055 + }, + { + "epoch": 2.8, + "learning_rate": 5.565933388491263e-07, + "loss": 0.9099, + "step": 5060 + }, + { + "epoch": 2.8, + "learning_rate": 5.415035667314328e-07, + "loss": 0.9057, + "step": 5065 + }, + { + "epoch": 2.8, + "learning_rate": 5.266189249275521e-07, + "loss": 0.8978, + "step": 5070 + }, + { + "epoch": 2.81, + "learning_rate": 5.119395382730929e-07, + "loss": 0.9172, + "step": 5075 + }, + { + "epoch": 2.81, + "learning_rate": 4.974655298822129e-07, + "loss": 0.9393, + "step": 5080 + }, + { + "epoch": 2.81, + "learning_rate": 4.831970211465892e-07, + "loss": 0.9137, + "step": 5085 + }, + { + "epoch": 2.81, + "learning_rate": 4.6913413173439723e-07, + "loss": 0.8573, + "step": 5090 + }, + { + "epoch": 2.82, + "learning_rate": 4.552769795893086e-07, + "loss": 0.9533, + "step": 5095 + }, + { + "epoch": 2.82, + "learning_rate": 4.416256809295083e-07, + "loss": 0.8693, + "step": 5100 + }, + { + "epoch": 2.82, + "learning_rate": 4.2818035024670963e-07, + "loss": 0.9531, + "step": 5105 + }, + { + "epoch": 2.83, + "learning_rate": 4.1494110030519397e-07, + "loss": 0.9077, + "step": 5110 + }, + { + "epoch": 2.83, + "learning_rate": 4.019080421408833e-07, + "loss": 0.9061, + "step": 5115 + }, + { + "epoch": 2.83, + "learning_rate": 3.8908128506037756e-07, + "loss": 0.9606, + "step": 5120 + }, + { + "epoch": 2.83, + "learning_rate": 3.7646093664007456e-07, + "loss": 0.9335, + "step": 5125 + }, + { + "epoch": 2.84, + "learning_rate": 3.640471027252346e-07, + "loss": 0.9054, + "step": 5130 + }, + { + "epoch": 2.84, + "learning_rate": 3.5183988742910903e-07, + "loss": 0.8801, + "step": 5135 + }, + { + "epoch": 2.84, + "learning_rate": 3.398393931320687e-07, + "loss": 0.913, + "step": 5140 + }, + { + "epoch": 2.84, + "learning_rate": 3.2804572048074357e-07, + "loss": 0.904, + "step": 5145 + }, + { + "epoch": 2.85, + "learning_rate": 3.164589683871705e-07, + "loss": 0.9144, + "step": 5150 + }, + { + "epoch": 2.85, + "learning_rate": 3.050792340279718e-07, + "loss": 0.9145, + "step": 5155 + }, + { + "epoch": 2.85, + "learning_rate": 2.939066128435419e-07, + "loss": 0.957, + "step": 5160 + }, + { + "epoch": 2.86, + "learning_rate": 2.829411985372399e-07, + "loss": 0.9196, + "step": 5165 + }, + { + "epoch": 2.86, + "learning_rate": 2.7218308307460916e-07, + "loss": 0.8893, + "step": 5170 + }, + { + "epoch": 2.86, + "learning_rate": 2.616323566825979e-07, + "loss": 0.9049, + "step": 5175 + }, + { + "epoch": 2.86, + "learning_rate": 2.51289107848815e-07, + "loss": 0.8977, + "step": 5180 + }, + { + "epoch": 2.87, + "learning_rate": 2.4115342332078074e-07, + "loss": 0.9187, + "step": 5185 + }, + { + "epoch": 2.87, + "learning_rate": 2.312253881051968e-07, + "loss": 0.9355, + "step": 5190 + }, + { + "epoch": 2.87, + "learning_rate": 2.2150508546723848e-07, + "loss": 0.9337, + "step": 5195 + }, + { + "epoch": 2.88, + "learning_rate": 2.119925969298553e-07, + "loss": 0.9011, + "step": 5200 + }, + { + "epoch": 2.88, + "learning_rate": 2.0268800227307982e-07, + "loss": 0.8987, + "step": 5205 + }, + { + "epoch": 2.88, + "learning_rate": 1.9359137953337548e-07, + "loss": 0.9206, + "step": 5210 + }, + { + "epoch": 2.88, + "learning_rate": 1.8470280500296199e-07, + "loss": 0.9485, + "step": 5215 + }, + { + "epoch": 2.89, + "learning_rate": 1.7602235322919102e-07, + "loss": 0.8902, + "step": 5220 + }, + { + "epoch": 2.89, + "learning_rate": 1.6755009701391045e-07, + "loss": 0.9484, + "step": 5225 + }, + { + "epoch": 2.89, + "learning_rate": 1.592861074128621e-07, + "loss": 0.9361, + "step": 5230 + }, + { + "epoch": 2.89, + "learning_rate": 1.5123045373508226e-07, + "loss": 0.9407, + "step": 5235 + }, + { + "epoch": 2.9, + "learning_rate": 1.4338320354231605e-07, + "loss": 0.8792, + "step": 5240 + }, + { + "epoch": 2.9, + "learning_rate": 1.3574442264846222e-07, + "loss": 0.9682, + "step": 5245 + }, + { + "epoch": 2.9, + "learning_rate": 1.2831417511900423e-07, + "loss": 0.8995, + "step": 5250 + }, + { + "epoch": 2.91, + "learning_rate": 1.2109252327048849e-07, + "loss": 0.8877, + "step": 5255 + }, + { + "epoch": 2.91, + "learning_rate": 1.1407952766999686e-07, + "loss": 0.8746, + "step": 5260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0727524713463333e-07, + "loss": 0.9287, + "step": 5265 + }, + { + "epoch": 2.91, + "learning_rate": 1.0067973873104097e-07, + "loss": 0.9045, + "step": 5270 + }, + { + "epoch": 2.92, + "learning_rate": 9.42930577749107e-08, + "loss": 0.942, + "step": 5275 + }, + { + "epoch": 2.92, + "learning_rate": 8.811525783052888e-08, + "loss": 0.885, + "step": 5280 + }, + { + "epoch": 2.92, + "learning_rate": 8.214639071031926e-08, + "loss": 0.8947, + "step": 5285 + }, + { + "epoch": 2.93, + "learning_rate": 7.638650647442125e-08, + "loss": 0.9309, + "step": 5290 + }, + { + "epoch": 2.93, + "learning_rate": 7.083565343024845e-08, + "loss": 0.896, + "step": 5295 + }, + { + "epoch": 2.93, + "learning_rate": 6.549387813210572e-08, + "loss": 0.9132, + "step": 5300 + }, + { + "epoch": 2.93, + "learning_rate": 6.036122538078393e-08, + "loss": 0.9185, + "step": 5305 + }, + { + "epoch": 2.94, + "learning_rate": 5.543773822319631e-08, + "loss": 0.9285, + "step": 5310 + }, + { + "epoch": 2.94, + "learning_rate": 5.072345795200384e-08, + "loss": 0.9074, + "step": 5315 + }, + { + "epoch": 2.94, + "learning_rate": 4.621842410527655e-08, + "loss": 0.8915, + "step": 5320 + }, + { + "epoch": 2.94, + "learning_rate": 4.1922674466166045e-08, + "loss": 0.8647, + "step": 5325 + }, + { + "epoch": 2.95, + "learning_rate": 3.783624506257799e-08, + "loss": 0.9393, + "step": 5330 + }, + { + "epoch": 2.95, + "learning_rate": 3.395917016688344e-08, + "loss": 0.9303, + "step": 5335 + }, + { + "epoch": 2.95, + "learning_rate": 3.029148229561629e-08, + "loss": 0.8998, + "step": 5340 + }, + { + "epoch": 2.96, + "learning_rate": 2.6833212209206872e-08, + "loss": 0.8964, + "step": 5345 + }, + { + "epoch": 2.96, + "learning_rate": 2.358438891173487e-08, + "loss": 0.9528, + "step": 5350 + }, + { + "epoch": 2.96, + "learning_rate": 2.0545039650665675e-08, + "loss": 0.8977, + "step": 5355 + }, + { + "epoch": 2.96, + "learning_rate": 1.7715189916636676e-08, + "loss": 0.9175, + "step": 5360 + }, + { + "epoch": 2.97, + "learning_rate": 1.5094863443243513e-08, + "loss": 0.8991, + "step": 5365 + }, + { + "epoch": 2.97, + "learning_rate": 1.2684082206829151e-08, + "loss": 0.9203, + "step": 5370 + }, + { + "epoch": 2.97, + "learning_rate": 1.0482866426311799e-08, + "loss": 0.9347, + "step": 5375 + }, + { + "epoch": 2.97, + "learning_rate": 8.491234563010041e-09, + "loss": 0.9361, + "step": 5380 + }, + { + "epoch": 2.98, + "learning_rate": 6.709203320484636e-09, + "loss": 0.8695, + "step": 5385 + }, + { + "epoch": 2.98, + "learning_rate": 5.13678764441361e-09, + "loss": 0.981, + "step": 5390 + }, + { + "epoch": 2.98, + "learning_rate": 3.774000722439608e-09, + "loss": 0.8908, + "step": 5395 + }, + { + "epoch": 2.99, + "learning_rate": 2.6208539840894e-09, + "loss": 0.9283, + "step": 5400 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.7547990626677555e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5400/training_args.bin b/checkpoint-5400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-5400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-600/README.md b/checkpoint-600/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-600/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-600/adapter_config.json b/checkpoint-600/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-600/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-600/adapter_model.bin b/checkpoint-600/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f608ff6f56584fd9da9378a4c311b48ace0f549 --- /dev/null +++ b/checkpoint-600/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef845ee7d7eef18796e1049acb997e5b53e281fbd06cae154631cb03d923194f +size 16821197 diff --git a/checkpoint-600/finetuning_args.json b/checkpoint-600/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-600/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-600/optimizer.pt b/checkpoint-600/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..92bda308f4fe0ff3a6ed082bd5a1659387fd512f --- /dev/null +++ b/checkpoint-600/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8cca74426664985fd766c82a2fa4462b31eb20516b7e127748da57aa74bd2c8 +size 33629893 diff --git a/checkpoint-600/rng_state.pth b/checkpoint-600/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bac8011f7cfcf5166187baa2addc49dd4357bd5 --- /dev/null +++ b/checkpoint-600/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0b63ee0c77eef8548d7a39ba49a013f500f1cb14887b6f7d96e24d6324fa62d +size 14575 diff --git a/checkpoint-600/scaler.pt b/checkpoint-600/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..842791b612283ceb8e68b64ed8e40e81c5a97bce --- /dev/null +++ b/checkpoint-600/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc9eacfeb00bd0bfeb98934a2309be01be65b288e0d747bbfc423b32679169f +size 557 diff --git a/checkpoint-600/scheduler.pt b/checkpoint-600/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..545257284c7477b152cb4531b9a155372ee96d4b --- /dev/null +++ b/checkpoint-600/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2519b853f2caee5f864ba4e39f6dc583341e51debbbebb895152877faa6f7f2e +size 627 diff --git a/checkpoint-600/trainer_state.json b/checkpoint-600/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1befe32e3d6933cce348959d89718353a66ac38d --- /dev/null +++ b/checkpoint-600/trainer_state.json @@ -0,0 +1,736 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3317666574509262, + "global_step": 600, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.94982187106304e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-600/training_args.bin b/checkpoint-600/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-600/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-700/README.md b/checkpoint-700/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-700/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-700/adapter_config.json b/checkpoint-700/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-700/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-700/adapter_model.bin b/checkpoint-700/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c752e71f07e221e258f205f36c884b4e9bd063 --- /dev/null +++ b/checkpoint-700/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c495a215a10eea6425385fef49878560261976fd01752ab04030af226bbe056 +size 16821197 diff --git a/checkpoint-700/finetuning_args.json b/checkpoint-700/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-700/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-700/optimizer.pt b/checkpoint-700/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ff45ad4a20e756944f9b0c3a904cdda6abe95d --- /dev/null +++ b/checkpoint-700/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c3841d1c57ad7da2f0637474039a5a89be6075efad70bd673b501a27c5b1e9 +size 33629893 diff --git a/checkpoint-700/rng_state.pth b/checkpoint-700/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5355ce70161f4ebbfb71af7a59ac9050244af8b1 --- /dev/null +++ b/checkpoint-700/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2626d41c7f824a8d2a0427abf5a8b60d94e3a74fc37ab15c72ea8deae5b8e3c4 +size 14575 diff --git a/checkpoint-700/scaler.pt b/checkpoint-700/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f851bd509f220aa69f1c2ca24b4f35d1b63e8a5 --- /dev/null +++ b/checkpoint-700/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eedd043df6cf348e21712f6001f51ee5b59434655346c4271bdeabe13333a8a9 +size 557 diff --git a/checkpoint-700/scheduler.pt b/checkpoint-700/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d26f74cb3d78f356d03487a127149cda08f3d112 --- /dev/null +++ b/checkpoint-700/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cfd37841cc59d73f857c2302609c8f1d4041d9f030be54121b47c7fb84e32ad +size 627 diff --git a/checkpoint-700/trainer_state.json b/checkpoint-700/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54c774cab484e1902ba2160dd57f11d99aff7236 --- /dev/null +++ b/checkpoint-700/trainer_state.json @@ -0,0 +1,856 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3870611003594139, + "global_step": 700, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 2.27479218290688e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-700/training_args.bin b/checkpoint-700/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-700/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-800/README.md b/checkpoint-800/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-800/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-800/adapter_config.json b/checkpoint-800/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-800/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-800/adapter_model.bin b/checkpoint-800/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..efd17cf6a33409ff54aea50a9f7679c5d23c4d6c --- /dev/null +++ b/checkpoint-800/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce29f5a752b011d9b0199d2fcdd4caf056c300ac23c5e169fbb1d73c26c2711 +size 16821197 diff --git a/checkpoint-800/finetuning_args.json b/checkpoint-800/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-800/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-800/optimizer.pt b/checkpoint-800/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3808f53241c0d6e40448e8725fffca7c5ec53d8 --- /dev/null +++ b/checkpoint-800/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89233f8f4a9230c74e67b8a1db69a4c1726e9f77157c5d40bef53a728cd78a8b +size 33629893 diff --git a/checkpoint-800/rng_state.pth b/checkpoint-800/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc6d71a568688eb2f01264c8ca7d401f5be9659c --- /dev/null +++ b/checkpoint-800/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64dc7d8f78e9f08c3dc70ba5e61460ae802e4ac9ff5c6d6217f6809464e38978 +size 14575 diff --git a/checkpoint-800/scaler.pt b/checkpoint-800/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e01dd7b5d3a8968bb4c73a805f08f0f65c9b57f --- /dev/null +++ b/checkpoint-800/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27ec07a12731ae6f9765d05fe7c8495505f1d0f90b4cc6255a0853fec3970808 +size 557 diff --git a/checkpoint-800/scheduler.pt b/checkpoint-800/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cfc87ea08a0765b87d57d48f092fb63d2411e33 --- /dev/null +++ b/checkpoint-800/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109f1b5dae123eb244804c29df3849a24f1d3840f1f58cd057f370999b0e2495 +size 627 diff --git a/checkpoint-800/trainer_state.json b/checkpoint-800/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f4e2e5147915db7c0e511bf9745a347d67449eed --- /dev/null +++ b/checkpoint-800/trainer_state.json @@ -0,0 +1,976 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.44235554326790155, + "global_step": 800, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 2.59976249475072e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-800/training_args.bin b/checkpoint-800/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-800/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/checkpoint-900/README.md b/checkpoint-900/README.md new file mode 100644 index 0000000000000000000000000000000000000000..53c0fe6b436905ad7a2c060e98fe1ebdeac1a2fe --- /dev/null +++ b/checkpoint-900/README.md @@ -0,0 +1,15 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +### Framework versions + + +- PEFT 0.5.0 diff --git a/checkpoint-900/adapter_config.json b/checkpoint-900/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..514ae99ac7471cc26016f7575e44dd8304f29b07 --- /dev/null +++ b/checkpoint-900/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-900/adapter_model.bin b/checkpoint-900/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b571e98984b4d29291578323a1cac85c7c42d --- /dev/null +++ b/checkpoint-900/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bef41421dc42fd41e31ee9cd3310b56fa5b047ad6ee81a9450408bef79cb5ae +size 16821197 diff --git a/checkpoint-900/finetuning_args.json b/checkpoint-900/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-900/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-900/optimizer.pt b/checkpoint-900/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c12f476b9e5e681e9953b2205da425d9c9a957f --- /dev/null +++ b/checkpoint-900/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275702d666fe06c395e50b00d216e320172d565acb8619dda739a645a65a6b90 +size 33629893 diff --git a/checkpoint-900/rng_state.pth b/checkpoint-900/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef4f9d7ca5c535f9a0952a467e9752d52a87a59b --- /dev/null +++ b/checkpoint-900/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86108cf7ed3871bc19b501560535532e1359eb86c2151cfa1e68a2f606c16eab +size 14575 diff --git a/checkpoint-900/scaler.pt b/checkpoint-900/scaler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce88e214e8dfdcd84c4762e445a17b775da1addc --- /dev/null +++ b/checkpoint-900/scaler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35d70411c5f2791415a19167968224d98e615ab85a34b84d6ff010df96a1eb6 +size 557 diff --git a/checkpoint-900/scheduler.pt b/checkpoint-900/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8e2db717fe1d1a3060c285feed66a0d728a3f6 --- /dev/null +++ b/checkpoint-900/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f39b104c6f9212e205d97a7b6077a01aa41dac5f807dd90a89038d00fdae4df +size 627 diff --git a/checkpoint-900/trainer_state.json b/checkpoint-900/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a308a0eb55d08acbbd88048f4a59936eb488321a --- /dev/null +++ b/checkpoint-900/trainer_state.json @@ -0,0 +1,1096 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4976499861763893, + "global_step": 900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 2.92473280659456e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-900/training_args.bin b/checkpoint-900/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/checkpoint-900/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/finetuning_args.json b/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..2d24e725cd885c4d901f65601add6860bc1db71a --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 3.0, + "train_loss": 0.9648243357649947, + "train_runtime": 56978.2271, + "train_samples_per_second": 1.523, + "train_steps_per_second": 0.095 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..974d49cbec43924576b6482f1a20b75b2e8afae1 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,1085 @@ +{"current_steps": 5, "total_steps": 5424, "loss": 1.6349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999895164082156e-05, "epoch": 0.0, "percentage": 0.09, "elapsed_time": "0:00:53", "remaining_time": "16:02:24"} +{"current_steps": 10, "total_steps": 5424, "loss": 1.6199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999958065720787e-05, "epoch": 0.01, "percentage": 0.18, "elapsed_time": "0:01:45", "remaining_time": "15:54:15"} +{"current_steps": 15, "total_steps": 5424, "loss": 1.4834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999905648201487e-05, "epoch": 0.01, "percentage": 0.28, "elapsed_time": "0:02:38", "remaining_time": "15:51:45"} +{"current_steps": 20, "total_steps": 5424, "loss": 1.3882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999832264289934e-05, "epoch": 0.01, "percentage": 0.37, "elapsed_time": "0:03:30", "remaining_time": "15:50:08"} +{"current_steps": 25, "total_steps": 5424, "loss": 1.3679, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999737914601591e-05, "epoch": 0.01, "percentage": 0.46, "elapsed_time": "0:04:23", "remaining_time": "15:48:36"} +{"current_steps": 30, "total_steps": 5424, "loss": 1.2396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999622599927756e-05, "epoch": 0.02, "percentage": 0.55, "elapsed_time": "0:05:16", "remaining_time": "15:47:21"} +{"current_steps": 35, "total_steps": 5424, "loss": 1.321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999486321235559e-05, "epoch": 0.02, "percentage": 0.65, "elapsed_time": "0:06:08", "remaining_time": "15:46:20"} +{"current_steps": 40, "total_steps": 5424, "loss": 1.2874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9993290796679516e-05, "epoch": 0.02, "percentage": 0.74, "elapsed_time": "0:07:01", "remaining_time": "15:45:12"} +{"current_steps": 45, "total_steps": 5424, "loss": 1.2607, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999150876543699e-05, "epoch": 0.02, "percentage": 0.83, "elapsed_time": "0:07:53", "remaining_time": "15:44:13"} +{"current_steps": 50, "total_steps": 5424, "loss": 1.2454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9989517133573694e-05, "epoch": 0.03, "percentage": 0.92, "elapsed_time": "0:08:46", "remaining_time": "15:43:10"} +{"current_steps": 55, "total_steps": 5424, "loss": 1.2799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9987315917793174e-05, "epoch": 0.03, "percentage": 1.01, "elapsed_time": "0:09:39", "remaining_time": "15:42:14"} +{"current_steps": 60, "total_steps": 5424, "loss": 1.2575, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998490513655676e-05, "epoch": 0.03, "percentage": 1.11, "elapsed_time": "0:10:31", "remaining_time": "15:41:18"} +{"current_steps": 65, "total_steps": 5424, "loss": 1.2404, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998228481008337e-05, "epoch": 0.04, "percentage": 1.2, "elapsed_time": "0:11:24", "remaining_time": "15:40:25"} +{"current_steps": 70, "total_steps": 5424, "loss": 1.2219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997945496034934e-05, "epoch": 0.04, "percentage": 1.29, "elapsed_time": "0:12:17", "remaining_time": "15:39:32"} +{"current_steps": 75, "total_steps": 5424, "loss": 1.2241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9976415611088267e-05, "epoch": 0.04, "percentage": 1.38, "elapsed_time": "0:13:09", "remaining_time": "15:38:36"} +{"current_steps": 80, "total_steps": 5424, "loss": 1.1716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997316678779079e-05, "epoch": 0.04, "percentage": 1.47, "elapsed_time": "0:14:02", "remaining_time": "15:37:42"} +{"current_steps": 85, "total_steps": 5424, "loss": 1.1883, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996970851770438e-05, "epoch": 0.05, "percentage": 1.57, "elapsed_time": "0:14:54", "remaining_time": "15:36:44"} +{"current_steps": 90, "total_steps": 5424, "loss": 1.205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9966040829833115e-05, "epoch": 0.05, "percentage": 1.66, "elapsed_time": "0:15:47", "remaining_time": "15:35:48"} +{"current_steps": 95, "total_steps": 5424, "loss": 1.1246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9962163754937426e-05, "epoch": 0.05, "percentage": 1.75, "elapsed_time": "0:16:39", "remaining_time": "15:34:53"} +{"current_steps": 100, "total_steps": 5424, "loss": 1.1636, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995807732553384e-05, "epoch": 0.06, "percentage": 1.84, "elapsed_time": "0:17:32", "remaining_time": "15:33:57"} +{"current_steps": 105, "total_steps": 5424, "loss": 1.158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9953781575894723e-05, "epoch": 0.06, "percentage": 1.94, "elapsed_time": "0:18:25", "remaining_time": "15:33:06"} +{"current_steps": 110, "total_steps": 5424, "loss": 1.1477, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9949276542048e-05, "epoch": 0.06, "percentage": 2.03, "elapsed_time": "0:19:17", "remaining_time": "15:32:09"} +{"current_steps": 115, "total_steps": 5424, "loss": 1.1678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9944562261776805e-05, "epoch": 0.06, "percentage": 2.12, "elapsed_time": "0:20:10", "remaining_time": "15:31:15"} +{"current_steps": 120, "total_steps": 5424, "loss": 1.1501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9939638774619216e-05, "epoch": 0.07, "percentage": 2.21, "elapsed_time": "0:21:02", "remaining_time": "15:30:23"} +{"current_steps": 125, "total_steps": 5424, "loss": 1.1955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99345061218679e-05, "epoch": 0.07, "percentage": 2.3, "elapsed_time": "0:21:55", "remaining_time": "15:29:30"} +{"current_steps": 130, "total_steps": 5424, "loss": 1.1724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9929164346569756e-05, "epoch": 0.07, "percentage": 2.4, "elapsed_time": "0:22:48", "remaining_time": "15:28:37"} +{"current_steps": 135, "total_steps": 5424, "loss": 1.177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9923613493525576e-05, "epoch": 0.07, "percentage": 2.49, "elapsed_time": "0:23:40", "remaining_time": "15:27:45"} +{"current_steps": 140, "total_steps": 5424, "loss": 1.1418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.991785360928968e-05, "epoch": 0.08, "percentage": 2.58, "elapsed_time": "0:24:33", "remaining_time": "15:26:54"} +{"current_steps": 145, "total_steps": 5424, "loss": 1.1898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.991188474216947e-05, "epoch": 0.08, "percentage": 2.67, "elapsed_time": "0:25:26", "remaining_time": "15:26:03"} +{"current_steps": 150, "total_steps": 5424, "loss": 1.1479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9905706942225094e-05, "epoch": 0.08, "percentage": 2.77, "elapsed_time": "0:26:18", "remaining_time": "15:25:09"} +{"current_steps": 155, "total_steps": 5424, "loss": 1.1356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9899320261268966e-05, "epoch": 0.09, "percentage": 2.86, "elapsed_time": "0:27:11", "remaining_time": "15:24:18"} +{"current_steps": 160, "total_steps": 5424, "loss": 1.1397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.989272475286537e-05, "epoch": 0.09, "percentage": 2.95, "elapsed_time": "0:28:04", "remaining_time": "15:23:23"} +{"current_steps": 165, "total_steps": 5424, "loss": 1.1215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9885920472330004e-05, "epoch": 0.09, "percentage": 3.04, "elapsed_time": "0:28:56", "remaining_time": "15:22:26"} +{"current_steps": 170, "total_steps": 5424, "loss": 1.167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9878907476729516e-05, "epoch": 0.09, "percentage": 3.13, "elapsed_time": "0:29:49", "remaining_time": "15:21:31"} +{"current_steps": 175, "total_steps": 5424, "loss": 1.1219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9871685824881e-05, "epoch": 0.1, "percentage": 3.23, "elapsed_time": "0:30:41", "remaining_time": "15:20:36"} +{"current_steps": 180, "total_steps": 5424, "loss": 1.0835, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9864255577351534e-05, "epoch": 0.1, "percentage": 3.32, "elapsed_time": "0:31:34", "remaining_time": "15:19:43"} +{"current_steps": 185, "total_steps": 5424, "loss": 1.0721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.985661679645769e-05, "epoch": 0.1, "percentage": 3.41, "elapsed_time": "0:32:26", "remaining_time": "15:18:47"} +{"current_steps": 190, "total_steps": 5424, "loss": 1.0692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9848769546264915e-05, "epoch": 0.11, "percentage": 3.5, "elapsed_time": "0:33:19", "remaining_time": "15:17:53"} +{"current_steps": 195, "total_steps": 5424, "loss": 1.0488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9840713892587146e-05, "epoch": 0.11, "percentage": 3.6, "elapsed_time": "0:34:11", "remaining_time": "15:16:56"} +{"current_steps": 200, "total_steps": 5424, "loss": 1.1285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983244990298609e-05, "epoch": 0.11, "percentage": 3.69, "elapsed_time": "0:35:04", "remaining_time": "15:16:00"} +{"current_steps": 205, "total_steps": 5424, "loss": 1.0832, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.982397764677081e-05, "epoch": 0.11, "percentage": 3.78, "elapsed_time": "0:35:56", "remaining_time": "15:15:05"} +{"current_steps": 210, "total_steps": 5424, "loss": 1.0652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.981529719499704e-05, "epoch": 0.12, "percentage": 3.87, "elapsed_time": "0:36:49", "remaining_time": "15:14:13"} +{"current_steps": 215, "total_steps": 5424, "loss": 1.1043, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.980640862046663e-05, "epoch": 0.12, "percentage": 3.96, "elapsed_time": "0:37:41", "remaining_time": "15:13:16"} +{"current_steps": 220, "total_steps": 5424, "loss": 1.112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.979731199772693e-05, "epoch": 0.12, "percentage": 4.06, "elapsed_time": "0:38:34", "remaining_time": "15:12:19"} +{"current_steps": 225, "total_steps": 5424, "loss": 1.1029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9788007403070146e-05, "epoch": 0.12, "percentage": 4.15, "elapsed_time": "0:39:26", "remaining_time": "15:11:27"} +{"current_steps": 230, "total_steps": 5424, "loss": 1.0869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.977849491453277e-05, "epoch": 0.13, "percentage": 4.24, "elapsed_time": "0:40:19", "remaining_time": "15:10:37"} +{"current_steps": 235, "total_steps": 5424, "loss": 1.0843, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.976877461189481e-05, "epoch": 0.13, "percentage": 4.33, "elapsed_time": "0:41:12", "remaining_time": "15:09:43"} +{"current_steps": 240, "total_steps": 5424, "loss": 1.0789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.975884657667922e-05, "epoch": 0.13, "percentage": 4.42, "elapsed_time": "0:42:04", "remaining_time": "15:08:49"} +{"current_steps": 245, "total_steps": 5424, "loss": 1.0449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.974871089215118e-05, "epoch": 0.14, "percentage": 4.52, "elapsed_time": "0:42:57", "remaining_time": "15:07:55"} +{"current_steps": 250, "total_steps": 5424, "loss": 1.1053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9738367643317405e-05, "epoch": 0.14, "percentage": 4.61, "elapsed_time": "0:43:49", "remaining_time": "15:07:03"} +{"current_steps": 255, "total_steps": 5424, "loss": 1.0651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9727816916925395e-05, "epoch": 0.14, "percentage": 4.7, "elapsed_time": "0:44:42", "remaining_time": "15:06:10"} +{"current_steps": 260, "total_steps": 5424, "loss": 1.0828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.971705880146276e-05, "epoch": 0.14, "percentage": 4.79, "elapsed_time": "0:45:34", "remaining_time": "15:05:16"} +{"current_steps": 265, "total_steps": 5424, "loss": 1.0932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.970609338715646e-05, "epoch": 0.15, "percentage": 4.89, "elapsed_time": "0:46:27", "remaining_time": "15:04:21"} +{"current_steps": 270, "total_steps": 5424, "loss": 1.0648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.969492076597203e-05, "epoch": 0.15, "percentage": 4.98, "elapsed_time": "0:47:19", "remaining_time": "15:03:26"} +{"current_steps": 275, "total_steps": 5424, "loss": 1.0948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.968354103161283e-05, "epoch": 0.15, "percentage": 5.07, "elapsed_time": "0:48:12", "remaining_time": "15:02:30"} +{"current_steps": 280, "total_steps": 5424, "loss": 1.0721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.967195427951926e-05, "epoch": 0.15, "percentage": 5.16, "elapsed_time": "0:49:04", "remaining_time": "15:01:36"} +{"current_steps": 285, "total_steps": 5424, "loss": 1.124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9660160606867936e-05, "epoch": 0.16, "percentage": 5.25, "elapsed_time": "0:49:57", "remaining_time": "15:00:42"} +{"current_steps": 290, "total_steps": 5424, "loss": 1.0963, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9648160112570896e-05, "epoch": 0.16, "percentage": 5.35, "elapsed_time": "0:50:49", "remaining_time": "14:59:49"} +{"current_steps": 295, "total_steps": 5424, "loss": 1.1078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9635952897274773e-05, "epoch": 0.16, "percentage": 5.44, "elapsed_time": "0:51:42", "remaining_time": "14:58:54"} +{"current_steps": 300, "total_steps": 5424, "loss": 1.1059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9623539063359925e-05, "epoch": 0.17, "percentage": 5.53, "elapsed_time": "0:52:34", "remaining_time": "14:57:59"} +{"current_steps": 305, "total_steps": 5424, "loss": 1.1032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.961091871493962e-05, "epoch": 0.17, "percentage": 5.62, "elapsed_time": "0:53:27", "remaining_time": "14:57:05"} +{"current_steps": 310, "total_steps": 5424, "loss": 1.0595, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.959809195785912e-05, "epoch": 0.17, "percentage": 5.72, "elapsed_time": "0:54:19", "remaining_time": "14:56:10"} +{"current_steps": 315, "total_steps": 5424, "loss": 1.1096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.958505889969481e-05, "epoch": 0.17, "percentage": 5.81, "elapsed_time": "0:55:11", "remaining_time": "14:55:14"} +{"current_steps": 320, "total_steps": 5424, "loss": 1.0589, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.957181964975329e-05, "epoch": 0.18, "percentage": 5.9, "elapsed_time": "0:56:04", "remaining_time": "14:54:19"} +{"current_steps": 325, "total_steps": 5424, "loss": 1.0608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.955837431907049e-05, "epoch": 0.18, "percentage": 5.99, "elapsed_time": "0:56:56", "remaining_time": "14:53:24"} +{"current_steps": 330, "total_steps": 5424, "loss": 1.0819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.954472302041069e-05, "epoch": 0.18, "percentage": 6.08, "elapsed_time": "0:57:49", "remaining_time": "14:52:29"} +{"current_steps": 335, "total_steps": 5424, "loss": 1.0759, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9530865868265605e-05, "epoch": 0.19, "percentage": 6.18, "elapsed_time": "0:58:41", "remaining_time": "14:51:34"} +{"current_steps": 340, "total_steps": 5424, "loss": 1.0515, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.951680297885342e-05, "epoch": 0.19, "percentage": 6.27, "elapsed_time": "0:59:33", "remaining_time": "14:50:38"} +{"current_steps": 345, "total_steps": 5424, "loss": 1.0371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.950253447011779e-05, "epoch": 0.19, "percentage": 6.36, "elapsed_time": "1:00:26", "remaining_time": "14:49:43"} +{"current_steps": 350, "total_steps": 5424, "loss": 1.0619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.948806046172691e-05, "epoch": 0.19, "percentage": 6.45, "elapsed_time": "1:01:18", "remaining_time": "14:48:48"} +{"current_steps": 355, "total_steps": 5424, "loss": 1.0757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.947338107507245e-05, "epoch": 0.2, "percentage": 6.54, "elapsed_time": "1:02:10", "remaining_time": "14:47:53"} +{"current_steps": 360, "total_steps": 5424, "loss": 1.0686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.945849643326857e-05, "epoch": 0.2, "percentage": 6.64, "elapsed_time": "1:03:03", "remaining_time": "14:46:59"} +{"current_steps": 365, "total_steps": 5424, "loss": 1.0809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9443406661150874e-05, "epoch": 0.2, "percentage": 6.73, "elapsed_time": "1:03:55", "remaining_time": "14:46:04"} +{"current_steps": 370, "total_steps": 5424, "loss": 1.0704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.942811188527537e-05, "epoch": 0.2, "percentage": 6.82, "elapsed_time": "1:04:48", "remaining_time": "14:45:10"} +{"current_steps": 375, "total_steps": 5424, "loss": 1.0655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.941261223391742e-05, "epoch": 0.21, "percentage": 6.91, "elapsed_time": "1:05:40", "remaining_time": "14:44:16"} +{"current_steps": 380, "total_steps": 5424, "loss": 1.1182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.939690783707063e-05, "epoch": 0.21, "percentage": 7.01, "elapsed_time": "1:06:33", "remaining_time": "14:43:21"} +{"current_steps": 385, "total_steps": 5424, "loss": 1.081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.938099882644578e-05, "epoch": 0.21, "percentage": 7.1, "elapsed_time": "1:07:25", "remaining_time": "14:42:27"} +{"current_steps": 390, "total_steps": 5424, "loss": 1.0792, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9364885335469734e-05, "epoch": 0.22, "percentage": 7.19, "elapsed_time": "1:08:17", "remaining_time": "14:41:32"} +{"current_steps": 395, "total_steps": 5424, "loss": 1.0244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.93485674992843e-05, "epoch": 0.22, "percentage": 7.28, "elapsed_time": "1:09:10", "remaining_time": "14:40:37"} +{"current_steps": 400, "total_steps": 5424, "loss": 1.0531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.933204545474511e-05, "epoch": 0.22, "percentage": 7.37, "elapsed_time": "1:10:02", "remaining_time": "14:39:42"} +{"current_steps": 405, "total_steps": 5424, "loss": 1.048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9315319340420465e-05, "epoch": 0.22, "percentage": 7.47, "elapsed_time": "1:10:54", "remaining_time": "14:38:50"} +{"current_steps": 410, "total_steps": 5424, "loss": 1.04, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.929838929659015e-05, "epoch": 0.23, "percentage": 7.56, "elapsed_time": "1:11:47", "remaining_time": "14:37:55"} +{"current_steps": 415, "total_steps": 5424, "loss": 1.0686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9281255465244314e-05, "epoch": 0.23, "percentage": 7.65, "elapsed_time": "1:12:39", "remaining_time": "14:37:01"} +{"current_steps": 420, "total_steps": 5424, "loss": 1.0933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.926391799008223e-05, "epoch": 0.23, "percentage": 7.74, "elapsed_time": "1:13:32", "remaining_time": "14:36:07"} +{"current_steps": 425, "total_steps": 5424, "loss": 1.0313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.924637701651111e-05, "epoch": 0.24, "percentage": 7.84, "elapsed_time": "1:14:24", "remaining_time": "14:35:12"} +{"current_steps": 430, "total_steps": 5424, "loss": 1.0811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9228632691644874e-05, "epoch": 0.24, "percentage": 7.93, "elapsed_time": "1:15:16", "remaining_time": "14:34:18"} +{"current_steps": 435, "total_steps": 5424, "loss": 1.0617, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.921068516430293e-05, "epoch": 0.24, "percentage": 8.02, "elapsed_time": "1:16:09", "remaining_time": "14:33:24"} +{"current_steps": 440, "total_steps": 5424, "loss": 1.0482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.919253458500892e-05, "epoch": 0.24, "percentage": 8.11, "elapsed_time": "1:17:01", "remaining_time": "14:32:30"} +{"current_steps": 445, "total_steps": 5424, "loss": 1.0681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9174181105989445e-05, "epoch": 0.25, "percentage": 8.2, "elapsed_time": "1:17:54", "remaining_time": "14:31:36"} +{"current_steps": 450, "total_steps": 5424, "loss": 1.0476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9155624881172834e-05, "epoch": 0.25, "percentage": 8.3, "elapsed_time": "1:18:46", "remaining_time": "14:30:42"} +{"current_steps": 455, "total_steps": 5424, "loss": 1.0463, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.913686606618777e-05, "epoch": 0.25, "percentage": 8.39, "elapsed_time": "1:19:38", "remaining_time": "14:29:49"} +{"current_steps": 460, "total_steps": 5424, "loss": 1.0213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.911790481836208e-05, "epoch": 0.25, "percentage": 8.48, "elapsed_time": "1:20:31", "remaining_time": "14:28:55"} +{"current_steps": 465, "total_steps": 5424, "loss": 0.9855, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.909874129672133e-05, "epoch": 0.26, "percentage": 8.57, "elapsed_time": "1:21:23", "remaining_time": "14:28:01"} +{"current_steps": 470, "total_steps": 5424, "loss": 1.045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.907937566198757e-05, "epoch": 0.26, "percentage": 8.67, "elapsed_time": "1:22:15", "remaining_time": "14:27:07"} +{"current_steps": 475, "total_steps": 5424, "loss": 1.0442, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9059808076577914e-05, "epoch": 0.26, "percentage": 8.76, "elapsed_time": "1:23:08", "remaining_time": "14:26:12"} +{"current_steps": 480, "total_steps": 5424, "loss": 1.0354, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.904003870460323e-05, "epoch": 0.27, "percentage": 8.85, "elapsed_time": "1:24:00", "remaining_time": "14:25:18"} +{"current_steps": 485, "total_steps": 5424, "loss": 1.0331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9020067711866735e-05, "epoch": 0.27, "percentage": 8.94, "elapsed_time": "1:24:53", "remaining_time": "14:24:24"} +{"current_steps": 490, "total_steps": 5424, "loss": 1.0585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.899989526586261e-05, "epoch": 0.27, "percentage": 9.03, "elapsed_time": "1:25:45", "remaining_time": "14:23:31"} +{"current_steps": 495, "total_steps": 5424, "loss": 1.0223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8979521535774636e-05, "epoch": 0.27, "percentage": 9.13, "elapsed_time": "1:26:37", "remaining_time": "14:22:37"} +{"current_steps": 500, "total_steps": 5424, "loss": 1.0118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.895894669247468e-05, "epoch": 0.28, "percentage": 9.22, "elapsed_time": "1:27:30", "remaining_time": "14:21:43"} +{"current_steps": 505, "total_steps": 5424, "loss": 1.0508, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8938170908521356e-05, "epoch": 0.28, "percentage": 9.31, "elapsed_time": "1:28:22", "remaining_time": "14:20:50"} +{"current_steps": 510, "total_steps": 5424, "loss": 1.0656, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8917194358158534e-05, "epoch": 0.28, "percentage": 9.4, "elapsed_time": "1:29:14", "remaining_time": "14:19:56"} +{"current_steps": 515, "total_steps": 5424, "loss": 1.0655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8896017217313886e-05, "epoch": 0.28, "percentage": 9.49, "elapsed_time": "1:30:07", "remaining_time": "14:19:02"} +{"current_steps": 520, "total_steps": 5424, "loss": 1.0833, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.887463966359741e-05, "epoch": 0.29, "percentage": 9.59, "elapsed_time": "1:30:59", "remaining_time": "14:18:08"} +{"current_steps": 525, "total_steps": 5424, "loss": 1.068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8853061876299956e-05, "epoch": 0.29, "percentage": 9.68, "elapsed_time": "1:31:52", "remaining_time": "14:17:15"} +{"current_steps": 530, "total_steps": 5424, "loss": 1.0487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8831284036391684e-05, "epoch": 0.29, "percentage": 9.77, "elapsed_time": "1:32:44", "remaining_time": "14:16:22"} +{"current_steps": 535, "total_steps": 5424, "loss": 1.059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.880930632652058e-05, "epoch": 0.3, "percentage": 9.86, "elapsed_time": "1:33:36", "remaining_time": "14:15:28"} +{"current_steps": 540, "total_steps": 5424, "loss": 1.0408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.878712893101092e-05, "epoch": 0.3, "percentage": 9.96, "elapsed_time": "1:34:29", "remaining_time": "14:14:35"} +{"current_steps": 545, "total_steps": 5424, "loss": 0.9976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.876475203586171e-05, "epoch": 0.3, "percentage": 10.05, "elapsed_time": "1:35:21", "remaining_time": "14:13:41"} +{"current_steps": 550, "total_steps": 5424, "loss": 1.0358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.874217582874514e-05, "epoch": 0.3, "percentage": 10.14, "elapsed_time": "1:36:13", "remaining_time": "14:12:47"} +{"current_steps": 555, "total_steps": 5424, "loss": 1.0073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8719400499005e-05, "epoch": 0.31, "percentage": 10.23, "elapsed_time": "1:37:06", "remaining_time": "14:11:53"} +{"current_steps": 560, "total_steps": 5424, "loss": 1.0742, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.869642623765509e-05, "epoch": 0.31, "percentage": 10.32, "elapsed_time": "1:37:58", "remaining_time": "14:11:00"} +{"current_steps": 565, "total_steps": 5424, "loss": 1.0463, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.867325323737765e-05, "epoch": 0.31, "percentage": 10.42, "elapsed_time": "1:38:51", "remaining_time": "14:10:06"} +{"current_steps": 570, "total_steps": 5424, "loss": 1.0242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.864988169252168e-05, "epoch": 0.32, "percentage": 10.51, "elapsed_time": "1:39:43", "remaining_time": "14:09:13"} +{"current_steps": 575, "total_steps": 5424, "loss": 1.0773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8626311799101375e-05, "epoch": 0.32, "percentage": 10.6, "elapsed_time": "1:40:35", "remaining_time": "14:08:20"} +{"current_steps": 580, "total_steps": 5424, "loss": 1.0371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.860254375479446e-05, "epoch": 0.32, "percentage": 10.69, "elapsed_time": "1:41:28", "remaining_time": "14:07:26"} +{"current_steps": 585, "total_steps": 5424, "loss": 1.0399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8578577758940504e-05, "epoch": 0.32, "percentage": 10.79, "elapsed_time": "1:42:20", "remaining_time": "14:06:33"} +{"current_steps": 590, "total_steps": 5424, "loss": 1.0411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.855441401253928e-05, "epoch": 0.33, "percentage": 10.88, "elapsed_time": "1:43:12", "remaining_time": "14:05:39"} +{"current_steps": 595, "total_steps": 5424, "loss": 1.0476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8530052718249076e-05, "epoch": 0.33, "percentage": 10.97, "elapsed_time": "1:44:05", "remaining_time": "14:04:46"} +{"current_steps": 600, "total_steps": 5424, "loss": 1.0075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.850549408038498e-05, "epoch": 0.33, "percentage": 11.06, "elapsed_time": "1:44:57", "remaining_time": "14:03:52"} +{"current_steps": 605, "total_steps": 5424, "loss": 1.0045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.848073830491717e-05, "epoch": 0.33, "percentage": 11.15, "elapsed_time": "1:45:50", "remaining_time": "14:03:00"} +{"current_steps": 610, "total_steps": 5424, "loss": 1.0368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.845578559946923e-05, "epoch": 0.34, "percentage": 11.25, "elapsed_time": "1:46:42", "remaining_time": "14:02:06"} +{"current_steps": 615, "total_steps": 5424, "loss": 1.0156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8430636173316306e-05, "epoch": 0.34, "percentage": 11.34, "elapsed_time": "1:47:34", "remaining_time": "14:01:13"} +{"current_steps": 620, "total_steps": 5424, "loss": 1.0334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.840529023738348e-05, "epoch": 0.34, "percentage": 11.43, "elapsed_time": "1:48:27", "remaining_time": "14:00:21"} +{"current_steps": 625, "total_steps": 5424, "loss": 0.9994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.837974800424389e-05, "epoch": 0.35, "percentage": 11.52, "elapsed_time": "1:49:19", "remaining_time": "13:59:28"} +{"current_steps": 630, "total_steps": 5424, "loss": 1.0467, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8354009688117026e-05, "epoch": 0.35, "percentage": 11.62, "elapsed_time": "1:50:12", "remaining_time": "13:58:35"} +{"current_steps": 635, "total_steps": 5424, "loss": 0.9968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8328075504866874e-05, "epoch": 0.35, "percentage": 11.71, "elapsed_time": "1:51:04", "remaining_time": "13:57:42"} +{"current_steps": 640, "total_steps": 5424, "loss": 1.041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8301945672000164e-05, "epoch": 0.35, "percentage": 11.8, "elapsed_time": "1:51:56", "remaining_time": "13:56:48"} +{"current_steps": 645, "total_steps": 5424, "loss": 0.9811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8275620408664487e-05, "epoch": 0.36, "percentage": 11.89, "elapsed_time": "1:52:49", "remaining_time": "13:55:55"} +{"current_steps": 650, "total_steps": 5424, "loss": 1.0235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8249099935646494e-05, "epoch": 0.36, "percentage": 11.98, "elapsed_time": "1:53:41", "remaining_time": "13:55:01"} +{"current_steps": 655, "total_steps": 5424, "loss": 1.0152, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.822238447537003e-05, "epoch": 0.36, "percentage": 12.08, "elapsed_time": "1:54:33", "remaining_time": "13:54:08"} +{"current_steps": 660, "total_steps": 5424, "loss": 1.038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.819547425189429e-05, "epoch": 0.36, "percentage": 12.17, "elapsed_time": "1:55:26", "remaining_time": "13:53:15"} +{"current_steps": 665, "total_steps": 5424, "loss": 1.0387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.81683694909119e-05, "epoch": 0.37, "percentage": 12.26, "elapsed_time": "1:56:18", "remaining_time": "13:52:21"} +{"current_steps": 670, "total_steps": 5424, "loss": 1.019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.814107041974707e-05, "epoch": 0.37, "percentage": 12.35, "elapsed_time": "1:57:10", "remaining_time": "13:51:28"} +{"current_steps": 675, "total_steps": 5424, "loss": 1.0403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.811357726735366e-05, "epoch": 0.37, "percentage": 12.44, "elapsed_time": "1:58:03", "remaining_time": "13:50:34"} +{"current_steps": 680, "total_steps": 5424, "loss": 1.0828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.808589026431324e-05, "epoch": 0.38, "percentage": 12.54, "elapsed_time": "1:58:55", "remaining_time": "13:49:41"} +{"current_steps": 685, "total_steps": 5424, "loss": 1.0128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.805800964283322e-05, "epoch": 0.38, "percentage": 12.63, "elapsed_time": "1:59:47", "remaining_time": "13:48:48"} +{"current_steps": 690, "total_steps": 5424, "loss": 1.0502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.802993563674483e-05, "epoch": 0.38, "percentage": 12.72, "elapsed_time": "2:00:40", "remaining_time": "13:47:54"} +{"current_steps": 695, "total_steps": 5424, "loss": 1.0169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.80016684815012e-05, "epoch": 0.38, "percentage": 12.81, "elapsed_time": "2:01:32", "remaining_time": "13:47:01"} +{"current_steps": 700, "total_steps": 5424, "loss": 1.0316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7973208414175406e-05, "epoch": 0.39, "percentage": 12.91, "elapsed_time": "2:02:25", "remaining_time": "13:46:08"} +{"current_steps": 705, "total_steps": 5424, "loss": 1.0263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.794455567345842e-05, "epoch": 0.39, "percentage": 13.0, "elapsed_time": "2:03:17", "remaining_time": "13:45:15"} +{"current_steps": 710, "total_steps": 5424, "loss": 1.0801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.791571049965714e-05, "epoch": 0.39, "percentage": 13.09, "elapsed_time": "2:04:09", "remaining_time": "13:44:22"} +{"current_steps": 715, "total_steps": 5424, "loss": 1.0575, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7886673134692404e-05, "epoch": 0.4, "percentage": 13.18, "elapsed_time": "2:05:02", "remaining_time": "13:43:29"} +{"current_steps": 720, "total_steps": 5424, "loss": 0.9767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7857443822096905e-05, "epoch": 0.4, "percentage": 13.27, "elapsed_time": "2:05:54", "remaining_time": "13:42:36"} +{"current_steps": 725, "total_steps": 5424, "loss": 0.9666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.782802280701319e-05, "epoch": 0.4, "percentage": 13.37, "elapsed_time": "2:06:46", "remaining_time": "13:41:43"} +{"current_steps": 730, "total_steps": 5424, "loss": 0.9983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.77984103361916e-05, "epoch": 0.4, "percentage": 13.46, "elapsed_time": "2:07:39", "remaining_time": "13:40:50"} +{"current_steps": 735, "total_steps": 5424, "loss": 1.0219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.776860665798816e-05, "epoch": 0.41, "percentage": 13.55, "elapsed_time": "2:08:31", "remaining_time": "13:39:57"} +{"current_steps": 740, "total_steps": 5424, "loss": 0.9963, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.773861202236257e-05, "epoch": 0.41, "percentage": 13.64, "elapsed_time": "2:09:24", "remaining_time": "13:39:04"} +{"current_steps": 745, "total_steps": 5424, "loss": 1.0219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.770842668087602e-05, "epoch": 0.41, "percentage": 13.74, "elapsed_time": "2:10:16", "remaining_time": "13:38:11"} +{"current_steps": 750, "total_steps": 5424, "loss": 1.0029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.767805088668916e-05, "epoch": 0.41, "percentage": 13.83, "elapsed_time": "2:11:08", "remaining_time": "13:37:18"} +{"current_steps": 755, "total_steps": 5424, "loss": 1.0077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7647484894559936e-05, "epoch": 0.42, "percentage": 13.92, "elapsed_time": "2:12:01", "remaining_time": "13:36:25"} +{"current_steps": 760, "total_steps": 5424, "loss": 0.9908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7616728960841444e-05, "epoch": 0.42, "percentage": 14.01, "elapsed_time": "2:12:53", "remaining_time": "13:35:32"} +{"current_steps": 765, "total_steps": 5424, "loss": 1.0092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.758578334347981e-05, "epoch": 0.42, "percentage": 14.1, "elapsed_time": "2:13:45", "remaining_time": "13:34:38"} +{"current_steps": 770, "total_steps": 5424, "loss": 0.9874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7554648302012015e-05, "epoch": 0.43, "percentage": 14.2, "elapsed_time": "2:14:38", "remaining_time": "13:33:45"} +{"current_steps": 775, "total_steps": 5424, "loss": 1.0185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7523324097563706e-05, "epoch": 0.43, "percentage": 14.29, "elapsed_time": "2:15:30", "remaining_time": "13:32:52"} +{"current_steps": 780, "total_steps": 5424, "loss": 1.0533, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.749181099284703e-05, "epoch": 0.43, "percentage": 14.38, "elapsed_time": "2:16:22", "remaining_time": "13:31:59"} +{"current_steps": 785, "total_steps": 5424, "loss": 1.0083, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.746010925215839e-05, "epoch": 0.43, "percentage": 14.47, "elapsed_time": "2:17:15", "remaining_time": "13:31:07"} +{"current_steps": 790, "total_steps": 5424, "loss": 1.0191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.74282191413763e-05, "epoch": 0.44, "percentage": 14.56, "elapsed_time": "2:18:07", "remaining_time": "13:30:15"} +{"current_steps": 795, "total_steps": 5424, "loss": 0.9786, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7396140927959045e-05, "epoch": 0.44, "percentage": 14.66, "elapsed_time": "2:19:00", "remaining_time": "13:29:23"} +{"current_steps": 800, "total_steps": 5424, "loss": 1.0197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7363874880942574e-05, "epoch": 0.44, "percentage": 14.75, "elapsed_time": "2:19:52", "remaining_time": "13:28:30"} +{"current_steps": 805, "total_steps": 5424, "loss": 1.0471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.733142127093813e-05, "epoch": 0.45, "percentage": 14.84, "elapsed_time": "2:20:45", "remaining_time": "13:27:38"} +{"current_steps": 810, "total_steps": 5424, "loss": 1.0134, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7298780370130014e-05, "epoch": 0.45, "percentage": 14.93, "elapsed_time": "2:21:37", "remaining_time": "13:26:46"} +{"current_steps": 815, "total_steps": 5424, "loss": 0.9801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.726595245227336e-05, "epoch": 0.45, "percentage": 15.03, "elapsed_time": "2:22:30", "remaining_time": "13:25:54"} +{"current_steps": 820, "total_steps": 5424, "loss": 0.9944, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.723293779269173e-05, "epoch": 0.45, "percentage": 15.12, "elapsed_time": "2:23:22", "remaining_time": "13:25:01"} +{"current_steps": 825, "total_steps": 5424, "loss": 1.0358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7199736668274924e-05, "epoch": 0.46, "percentage": 15.21, "elapsed_time": "2:24:15", "remaining_time": "13:24:09"} +{"current_steps": 830, "total_steps": 5424, "loss": 0.9907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.716634935747655e-05, "epoch": 0.46, "percentage": 15.3, "elapsed_time": "2:25:07", "remaining_time": "13:23:16"} +{"current_steps": 835, "total_steps": 5424, "loss": 1.0698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.713277614031177e-05, "epoch": 0.46, "percentage": 15.39, "elapsed_time": "2:26:00", "remaining_time": "13:22:24"} +{"current_steps": 840, "total_steps": 5424, "loss": 1.0466, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.70990172983549e-05, "epoch": 0.46, "percentage": 15.49, "elapsed_time": "2:26:52", "remaining_time": "13:21:32"} +{"current_steps": 845, "total_steps": 5424, "loss": 0.9489, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.706507311473707e-05, "epoch": 0.47, "percentage": 15.58, "elapsed_time": "2:27:45", "remaining_time": "13:20:40"} +{"current_steps": 850, "total_steps": 5424, "loss": 0.9936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.703094387414385e-05, "epoch": 0.47, "percentage": 15.67, "elapsed_time": "2:28:37", "remaining_time": "13:19:48"} +{"current_steps": 855, "total_steps": 5424, "loss": 0.9672, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.699662986281288e-05, "epoch": 0.47, "percentage": 15.76, "elapsed_time": "2:29:30", "remaining_time": "13:18:56"} +{"current_steps": 860, "total_steps": 5424, "loss": 1.0164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.696213136853141e-05, "epoch": 0.48, "percentage": 15.86, "elapsed_time": "2:30:22", "remaining_time": "13:18:04"} +{"current_steps": 865, "total_steps": 5424, "loss": 0.9817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6927448680633954e-05, "epoch": 0.48, "percentage": 15.95, "elapsed_time": "2:31:15", "remaining_time": "13:17:11"} +{"current_steps": 870, "total_steps": 5424, "loss": 1.028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.689258208999983e-05, "epoch": 0.48, "percentage": 16.04, "elapsed_time": "2:32:07", "remaining_time": "13:16:18"} +{"current_steps": 875, "total_steps": 5424, "loss": 1.0239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6857531889050716e-05, "epoch": 0.48, "percentage": 16.13, "elapsed_time": "2:33:00", "remaining_time": "13:15:26"} +{"current_steps": 880, "total_steps": 5424, "loss": 1.0378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.682229837174821e-05, "epoch": 0.49, "percentage": 16.22, "elapsed_time": "2:33:52", "remaining_time": "13:14:34"} +{"current_steps": 885, "total_steps": 5424, "loss": 0.9908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.678688183359135e-05, "epoch": 0.49, "percentage": 16.32, "elapsed_time": "2:34:45", "remaining_time": "13:13:41"} +{"current_steps": 890, "total_steps": 5424, "loss": 0.9384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.675128257161418e-05, "epoch": 0.49, "percentage": 16.41, "elapsed_time": "2:35:37", "remaining_time": "13:12:49"} +{"current_steps": 895, "total_steps": 5424, "loss": 0.9481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.671550088438319e-05, "epoch": 0.49, "percentage": 16.5, "elapsed_time": "2:36:30", "remaining_time": "13:11:57"} +{"current_steps": 900, "total_steps": 5424, "loss": 0.9541, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6679537071994874e-05, "epoch": 0.5, "percentage": 16.59, "elapsed_time": "2:37:22", "remaining_time": "13:11:04"} +{"current_steps": 905, "total_steps": 5424, "loss": 0.9787, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6643391436073165e-05, "epoch": 0.5, "percentage": 16.69, "elapsed_time": "2:38:14", "remaining_time": "13:10:11"} +{"current_steps": 910, "total_steps": 5424, "loss": 0.9897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.660706427976693e-05, "epoch": 0.5, "percentage": 16.78, "elapsed_time": "2:39:07", "remaining_time": "13:09:18"} +{"current_steps": 915, "total_steps": 5424, "loss": 0.9947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.657055590774745e-05, "epoch": 0.51, "percentage": 16.87, "elapsed_time": "2:39:59", "remaining_time": "13:08:25"} +{"current_steps": 920, "total_steps": 5424, "loss": 0.9614, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6533866626205805e-05, "epoch": 0.51, "percentage": 16.96, "elapsed_time": "2:40:52", "remaining_time": "13:07:32"} +{"current_steps": 925, "total_steps": 5424, "loss": 1.0323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.649699674285036e-05, "epoch": 0.51, "percentage": 17.05, "elapsed_time": "2:41:44", "remaining_time": "13:06:39"} +{"current_steps": 930, "total_steps": 5424, "loss": 1.0345, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.645994656690417e-05, "epoch": 0.51, "percentage": 17.15, "elapsed_time": "2:42:36", "remaining_time": "13:05:46"} +{"current_steps": 935, "total_steps": 5424, "loss": 1.0432, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.642271640910235e-05, "epoch": 0.52, "percentage": 17.24, "elapsed_time": "2:43:29", "remaining_time": "13:04:54"} +{"current_steps": 940, "total_steps": 5424, "loss": 1.0102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.638530658168954e-05, "epoch": 0.52, "percentage": 17.33, "elapsed_time": "2:44:21", "remaining_time": "13:04:01"} +{"current_steps": 945, "total_steps": 5424, "loss": 1.0361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6347717398417203e-05, "epoch": 0.52, "percentage": 17.42, "elapsed_time": "2:45:13", "remaining_time": "13:03:08"} +{"current_steps": 950, "total_steps": 5424, "loss": 0.9818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6309949174541096e-05, "epoch": 0.53, "percentage": 17.51, "elapsed_time": "2:46:06", "remaining_time": "13:02:15"} +{"current_steps": 955, "total_steps": 5424, "loss": 1.0293, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.627200222681851e-05, "epoch": 0.53, "percentage": 17.61, "elapsed_time": "2:46:58", "remaining_time": "13:01:22"} +{"current_steps": 960, "total_steps": 5424, "loss": 1.0125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6233876873505694e-05, "epoch": 0.53, "percentage": 17.7, "elapsed_time": "2:47:50", "remaining_time": "13:00:29"} +{"current_steps": 965, "total_steps": 5424, "loss": 1.0039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.619557343435516e-05, "epoch": 0.53, "percentage": 17.79, "elapsed_time": "2:48:43", "remaining_time": "12:59:36"} +{"current_steps": 970, "total_steps": 5424, "loss": 1.0146, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.615709223061302e-05, "epoch": 0.54, "percentage": 17.88, "elapsed_time": "2:49:35", "remaining_time": "12:58:43"} +{"current_steps": 975, "total_steps": 5424, "loss": 1.0352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.611843358501624e-05, "epoch": 0.54, "percentage": 17.98, "elapsed_time": "2:50:27", "remaining_time": "12:57:50"} +{"current_steps": 980, "total_steps": 5424, "loss": 1.0405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6079597821789993e-05, "epoch": 0.54, "percentage": 18.07, "elapsed_time": "2:51:20", "remaining_time": "12:56:57"} +{"current_steps": 985, "total_steps": 5424, "loss": 0.9834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.604058526664491e-05, "epoch": 0.54, "percentage": 18.16, "elapsed_time": "2:52:12", "remaining_time": "12:56:04"} +{"current_steps": 990, "total_steps": 5424, "loss": 1.0252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.600139624677436e-05, "epoch": 0.55, "percentage": 18.25, "elapsed_time": "2:53:04", "remaining_time": "12:55:11"} +{"current_steps": 995, "total_steps": 5424, "loss": 1.0516, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.596203109085168e-05, "epoch": 0.55, "percentage": 18.34, "elapsed_time": "2:53:57", "remaining_time": "12:54:18"} +{"current_steps": 1000, "total_steps": 5424, "loss": 0.9762, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5922490129027464e-05, "epoch": 0.55, "percentage": 18.44, "elapsed_time": "2:54:49", "remaining_time": "12:53:25"} +{"current_steps": 1005, "total_steps": 5424, "loss": 0.9801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.588277369292674e-05, "epoch": 0.56, "percentage": 18.53, "elapsed_time": "2:55:41", "remaining_time": "12:52:32"} +{"current_steps": 1010, "total_steps": 5424, "loss": 1.0127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5842882115646234e-05, "epoch": 0.56, "percentage": 18.62, "elapsed_time": "2:56:34", "remaining_time": "12:51:40"} +{"current_steps": 1015, "total_steps": 5424, "loss": 1.0415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.580281573175157e-05, "epoch": 0.56, "percentage": 18.71, "elapsed_time": "2:57:26", "remaining_time": "12:50:47"} +{"current_steps": 1020, "total_steps": 5424, "loss": 0.9974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.576257487727442e-05, "epoch": 0.56, "percentage": 18.81, "elapsed_time": "2:58:18", "remaining_time": "12:49:54"} +{"current_steps": 1025, "total_steps": 5424, "loss": 1.0097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.572215988970974e-05, "epoch": 0.57, "percentage": 18.9, "elapsed_time": "2:59:11", "remaining_time": "12:49:01"} +{"current_steps": 1030, "total_steps": 5424, "loss": 1.0004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.568157110801293e-05, "epoch": 0.57, "percentage": 18.99, "elapsed_time": "3:00:03", "remaining_time": "12:48:09"} +{"current_steps": 1035, "total_steps": 5424, "loss": 0.9932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5640808872596944e-05, "epoch": 0.57, "percentage": 19.08, "elapsed_time": "3:00:56", "remaining_time": "12:47:16"} +{"current_steps": 1040, "total_steps": 5424, "loss": 0.9437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5599873525329505e-05, "epoch": 0.58, "percentage": 19.17, "elapsed_time": "3:01:48", "remaining_time": "12:46:23"} +{"current_steps": 1045, "total_steps": 5424, "loss": 1.0004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.555876540953019e-05, "epoch": 0.58, "percentage": 19.27, "elapsed_time": "3:02:40", "remaining_time": "12:45:30"} +{"current_steps": 1050, "total_steps": 5424, "loss": 0.9743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.551748486996755e-05, "epoch": 0.58, "percentage": 19.36, "elapsed_time": "3:03:33", "remaining_time": "12:44:37"} +{"current_steps": 1055, "total_steps": 5424, "loss": 1.0303, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.547603225285626e-05, "epoch": 0.58, "percentage": 19.45, "elapsed_time": "3:04:25", "remaining_time": "12:43:44"} +{"current_steps": 1060, "total_steps": 5424, "loss": 1.0225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.543440790585417e-05, "epoch": 0.59, "percentage": 19.54, "elapsed_time": "3:05:17", "remaining_time": "12:42:52"} +{"current_steps": 1065, "total_steps": 5424, "loss": 1.0102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.539261217805939e-05, "epoch": 0.59, "percentage": 19.63, "elapsed_time": "3:06:10", "remaining_time": "12:41:59"} +{"current_steps": 1070, "total_steps": 5424, "loss": 1.0087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.535064542000743e-05, "epoch": 0.59, "percentage": 19.73, "elapsed_time": "3:07:02", "remaining_time": "12:41:06"} +{"current_steps": 1075, "total_steps": 5424, "loss": 0.952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5308507983668165e-05, "epoch": 0.59, "percentage": 19.82, "elapsed_time": "3:07:55", "remaining_time": "12:40:14"} +{"current_steps": 1080, "total_steps": 5424, "loss": 0.9767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.526620022244293e-05, "epoch": 0.6, "percentage": 19.91, "elapsed_time": "3:08:47", "remaining_time": "12:39:21"} +{"current_steps": 1085, "total_steps": 5424, "loss": 1.0049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.522372249116158e-05, "epoch": 0.6, "percentage": 20.0, "elapsed_time": "3:09:39", "remaining_time": "12:38:28"} +{"current_steps": 1090, "total_steps": 5424, "loss": 1.0397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5181075146079456e-05, "epoch": 0.6, "percentage": 20.1, "elapsed_time": "3:10:32", "remaining_time": "12:37:35"} +{"current_steps": 1095, "total_steps": 5424, "loss": 1.0167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5138258544874455e-05, "epoch": 0.61, "percentage": 20.19, "elapsed_time": "3:11:24", "remaining_time": "12:36:42"} +{"current_steps": 1100, "total_steps": 5424, "loss": 0.9966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5095273046643985e-05, "epoch": 0.61, "percentage": 20.28, "elapsed_time": "3:12:16", "remaining_time": "12:35:50"} +{"current_steps": 1105, "total_steps": 5424, "loss": 0.983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5052119011901986e-05, "epoch": 0.61, "percentage": 20.37, "elapsed_time": "3:13:09", "remaining_time": "12:34:57"} +{"current_steps": 1110, "total_steps": 5424, "loss": 1.0301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.500879680257587e-05, "epoch": 0.61, "percentage": 20.46, "elapsed_time": "3:14:01", "remaining_time": "12:34:04"} +{"current_steps": 1115, "total_steps": 5424, "loss": 1.0371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4965306782003535e-05, "epoch": 0.62, "percentage": 20.56, "elapsed_time": "3:14:54", "remaining_time": "12:33:12"} +{"current_steps": 1120, "total_steps": 5424, "loss": 1.0083, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.492164931493028e-05, "epoch": 0.62, "percentage": 20.65, "elapsed_time": "3:15:46", "remaining_time": "12:32:19"} +{"current_steps": 1125, "total_steps": 5424, "loss": 0.9728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.487782476750575e-05, "epoch": 0.62, "percentage": 20.74, "elapsed_time": "3:16:38", "remaining_time": "12:31:27"} +{"current_steps": 1130, "total_steps": 5424, "loss": 0.9892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4833833507280884e-05, "epoch": 0.62, "percentage": 20.83, "elapsed_time": "3:17:31", "remaining_time": "12:30:34"} +{"current_steps": 1135, "total_steps": 5424, "loss": 0.9847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4789675903204805e-05, "epoch": 0.63, "percentage": 20.93, "elapsed_time": "3:18:23", "remaining_time": "12:29:41"} +{"current_steps": 1140, "total_steps": 5424, "loss": 1.0545, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.474535232562176e-05, "epoch": 0.63, "percentage": 21.02, "elapsed_time": "3:19:15", "remaining_time": "12:28:49"} +{"current_steps": 1145, "total_steps": 5424, "loss": 1.0204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.470086314626797e-05, "epoch": 0.63, "percentage": 21.11, "elapsed_time": "3:20:08", "remaining_time": "12:27:56"} +{"current_steps": 1150, "total_steps": 5424, "loss": 0.9648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.465620873826856e-05, "epoch": 0.64, "percentage": 21.2, "elapsed_time": "3:21:00", "remaining_time": "12:27:03"} +{"current_steps": 1155, "total_steps": 5424, "loss": 0.9545, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.46113894761344e-05, "epoch": 0.64, "percentage": 21.29, "elapsed_time": "3:21:52", "remaining_time": "12:26:10"} +{"current_steps": 1160, "total_steps": 5424, "loss": 1.0256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.456640573575896e-05, "epoch": 0.64, "percentage": 21.39, "elapsed_time": "3:22:45", "remaining_time": "12:25:18"} +{"current_steps": 1165, "total_steps": 5424, "loss": 0.99, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4521257894415183e-05, "epoch": 0.64, "percentage": 21.48, "elapsed_time": "3:23:37", "remaining_time": "12:24:25"} +{"current_steps": 1170, "total_steps": 5424, "loss": 0.9906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.44759463307523e-05, "epoch": 0.65, "percentage": 21.57, "elapsed_time": "3:24:29", "remaining_time": "12:23:32"} +{"current_steps": 1175, "total_steps": 5424, "loss": 0.9876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.443047142479266e-05, "epoch": 0.65, "percentage": 21.66, "elapsed_time": "3:25:22", "remaining_time": "12:22:39"} +{"current_steps": 1180, "total_steps": 5424, "loss": 1.0495, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4384833557928553e-05, "epoch": 0.65, "percentage": 21.76, "elapsed_time": "3:26:14", "remaining_time": "12:21:47"} +{"current_steps": 1185, "total_steps": 5424, "loss": 0.9869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4339033112918966e-05, "epoch": 0.66, "percentage": 21.85, "elapsed_time": "3:27:07", "remaining_time": "12:20:54"} +{"current_steps": 1190, "total_steps": 5424, "loss": 1.0299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4293070473886456e-05, "epoch": 0.66, "percentage": 21.94, "elapsed_time": "3:27:59", "remaining_time": "12:20:01"} +{"current_steps": 1195, "total_steps": 5424, "loss": 1.0073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.424694602631385e-05, "epoch": 0.66, "percentage": 22.03, "elapsed_time": "3:28:51", "remaining_time": "12:19:09"} +{"current_steps": 1200, "total_steps": 5424, "loss": 1.0023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.420066015704105e-05, "epoch": 0.66, "percentage": 22.12, "elapsed_time": "3:29:44", "remaining_time": "12:18:16"} +{"current_steps": 1205, "total_steps": 5424, "loss": 0.9462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.41542132542618e-05, "epoch": 0.67, "percentage": 22.22, "elapsed_time": "3:30:36", "remaining_time": "12:17:24"} +{"current_steps": 1210, "total_steps": 5424, "loss": 1.0116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.410760570752037e-05, "epoch": 0.67, "percentage": 22.31, "elapsed_time": "3:31:29", "remaining_time": "12:16:31"} +{"current_steps": 1215, "total_steps": 5424, "loss": 0.9652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4060837907708375e-05, "epoch": 0.67, "percentage": 22.4, "elapsed_time": "3:32:21", "remaining_time": "12:15:38"} +{"current_steps": 1220, "total_steps": 5424, "loss": 1.0411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.401391024706142e-05, "epoch": 0.67, "percentage": 22.49, "elapsed_time": "3:33:13", "remaining_time": "12:14:46"} +{"current_steps": 1225, "total_steps": 5424, "loss": 0.9691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.396682311915586e-05, "epoch": 0.68, "percentage": 22.58, "elapsed_time": "3:34:06", "remaining_time": "12:13:53"} +{"current_steps": 1230, "total_steps": 5424, "loss": 0.961, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3919576918905495e-05, "epoch": 0.68, "percentage": 22.68, "elapsed_time": "3:34:58", "remaining_time": "12:13:01"} +{"current_steps": 1235, "total_steps": 5424, "loss": 0.9602, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.387217204255819e-05, "epoch": 0.68, "percentage": 22.77, "elapsed_time": "3:35:50", "remaining_time": "12:12:08"} +{"current_steps": 1240, "total_steps": 5424, "loss": 1.0197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3824608887692666e-05, "epoch": 0.69, "percentage": 22.86, "elapsed_time": "3:36:43", "remaining_time": "12:11:15"} +{"current_steps": 1245, "total_steps": 5424, "loss": 0.9982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.377688785321507e-05, "epoch": 0.69, "percentage": 22.95, "elapsed_time": "3:37:35", "remaining_time": "12:10:22"} +{"current_steps": 1250, "total_steps": 5424, "loss": 0.986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.372900933935569e-05, "epoch": 0.69, "percentage": 23.05, "elapsed_time": "3:38:28", "remaining_time": "12:09:30"} +{"current_steps": 1255, "total_steps": 5424, "loss": 0.9744, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.368097374766556e-05, "epoch": 0.69, "percentage": 23.14, "elapsed_time": "3:39:20", "remaining_time": "12:08:37"} +{"current_steps": 1260, "total_steps": 5424, "loss": 1.0078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3632781481013105e-05, "epoch": 0.7, "percentage": 23.23, "elapsed_time": "3:40:12", "remaining_time": "12:07:44"} +{"current_steps": 1265, "total_steps": 5424, "loss": 0.9865, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.358443294358077e-05, "epoch": 0.7, "percentage": 23.32, "elapsed_time": "3:41:05", "remaining_time": "12:06:52"} +{"current_steps": 1270, "total_steps": 5424, "loss": 0.976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.35359285408616e-05, "epoch": 0.7, "percentage": 23.41, "elapsed_time": "3:41:57", "remaining_time": "12:05:59"} +{"current_steps": 1275, "total_steps": 5424, "loss": 1.0141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.348726867965591e-05, "epoch": 0.71, "percentage": 23.51, "elapsed_time": "3:42:49", "remaining_time": "12:05:06"} +{"current_steps": 1280, "total_steps": 5424, "loss": 0.952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.343845376806777e-05, "epoch": 0.71, "percentage": 23.6, "elapsed_time": "3:43:42", "remaining_time": "12:04:13"} +{"current_steps": 1285, "total_steps": 5424, "loss": 1.0239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.338948421550169e-05, "epoch": 0.71, "percentage": 23.69, "elapsed_time": "3:44:34", "remaining_time": "12:03:20"} +{"current_steps": 1290, "total_steps": 5424, "loss": 0.994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.334036043265909e-05, "epoch": 0.71, "percentage": 23.78, "elapsed_time": "3:45:26", "remaining_time": "12:02:28"} +{"current_steps": 1295, "total_steps": 5424, "loss": 1.0505, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.329108283153492e-05, "epoch": 0.72, "percentage": 23.88, "elapsed_time": "3:46:19", "remaining_time": "12:01:35"} +{"current_steps": 1300, "total_steps": 5424, "loss": 0.9919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3241651825414195e-05, "epoch": 0.72, "percentage": 23.97, "elapsed_time": "3:47:11", "remaining_time": "12:00:43"} +{"current_steps": 1305, "total_steps": 5424, "loss": 1.0169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.31920678288685e-05, "epoch": 0.72, "percentage": 24.06, "elapsed_time": "3:48:03", "remaining_time": "11:59:50"} +{"current_steps": 1310, "total_steps": 5424, "loss": 0.9938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3142331257752546e-05, "epoch": 0.72, "percentage": 24.15, "elapsed_time": "3:48:56", "remaining_time": "11:58:58"} +{"current_steps": 1315, "total_steps": 5424, "loss": 0.92, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.309244252920064e-05, "epoch": 0.73, "percentage": 24.24, "elapsed_time": "3:49:48", "remaining_time": "11:58:05"} +{"current_steps": 1320, "total_steps": 5424, "loss": 0.9537, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.304240206162326e-05, "epoch": 0.73, "percentage": 24.34, "elapsed_time": "3:50:41", "remaining_time": "11:57:12"} +{"current_steps": 1325, "total_steps": 5424, "loss": 0.996, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.299221027470345e-05, "epoch": 0.73, "percentage": 24.43, "elapsed_time": "3:51:33", "remaining_time": "11:56:20"} +{"current_steps": 1330, "total_steps": 5424, "loss": 0.9652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.294186758939339e-05, "epoch": 0.74, "percentage": 24.52, "elapsed_time": "3:52:25", "remaining_time": "11:55:27"} +{"current_steps": 1335, "total_steps": 5424, "loss": 1.0138, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2891374427910795e-05, "epoch": 0.74, "percentage": 24.61, "elapsed_time": "3:53:18", "remaining_time": "11:54:34"} +{"current_steps": 1340, "total_steps": 5424, "loss": 0.9267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.284073121373544e-05, "epoch": 0.74, "percentage": 24.71, "elapsed_time": "3:54:10", "remaining_time": "11:53:42"} +{"current_steps": 1345, "total_steps": 5424, "loss": 0.9899, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.278993837160553e-05, "epoch": 0.74, "percentage": 24.8, "elapsed_time": "3:55:02", "remaining_time": "11:52:49"} +{"current_steps": 1350, "total_steps": 5424, "loss": 0.963, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.273899632751422e-05, "epoch": 0.75, "percentage": 24.89, "elapsed_time": "3:55:55", "remaining_time": "11:51:57"} +{"current_steps": 1355, "total_steps": 5424, "loss": 1.0066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2687905508705974e-05, "epoch": 0.75, "percentage": 24.98, "elapsed_time": "3:56:47", "remaining_time": "11:51:04"} +{"current_steps": 1360, "total_steps": 5424, "loss": 0.9939, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.263666634367303e-05, "epoch": 0.75, "percentage": 25.07, "elapsed_time": "3:57:39", "remaining_time": "11:50:11"} +{"current_steps": 1365, "total_steps": 5424, "loss": 0.9667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.258527926215178e-05, "epoch": 0.75, "percentage": 25.17, "elapsed_time": "3:58:32", "remaining_time": "11:49:19"} +{"current_steps": 1370, "total_steps": 5424, "loss": 0.9999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.253374469511917e-05, "epoch": 0.76, "percentage": 25.26, "elapsed_time": "3:59:24", "remaining_time": "11:48:26"} +{"current_steps": 1375, "total_steps": 5424, "loss": 0.9738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.248206307478909e-05, "epoch": 0.76, "percentage": 25.35, "elapsed_time": "4:00:16", "remaining_time": "11:47:33"} +{"current_steps": 1380, "total_steps": 5424, "loss": 0.9829, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.243023483460875e-05, "epoch": 0.76, "percentage": 25.44, "elapsed_time": "4:01:09", "remaining_time": "11:46:41"} +{"current_steps": 1385, "total_steps": 5424, "loss": 1.0107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.237826040925503e-05, "epoch": 0.77, "percentage": 25.53, "elapsed_time": "4:02:01", "remaining_time": "11:45:48"} +{"current_steps": 1390, "total_steps": 5424, "loss": 0.9992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.232614023463088e-05, "epoch": 0.77, "percentage": 25.63, "elapsed_time": "4:02:54", "remaining_time": "11:44:56"} +{"current_steps": 1395, "total_steps": 5424, "loss": 0.9566, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.227387474786159e-05, "epoch": 0.77, "percentage": 25.72, "elapsed_time": "4:03:46", "remaining_time": "11:44:03"} +{"current_steps": 1400, "total_steps": 5424, "loss": 0.9722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.222146438729119e-05, "epoch": 0.77, "percentage": 25.81, "elapsed_time": "4:04:38", "remaining_time": "11:43:11"} +{"current_steps": 1405, "total_steps": 5424, "loss": 0.988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.216890959247873e-05, "epoch": 0.78, "percentage": 25.9, "elapsed_time": "4:05:31", "remaining_time": "11:42:18"} +{"current_steps": 1410, "total_steps": 5424, "loss": 1.0103, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.211621080419463e-05, "epoch": 0.78, "percentage": 26.0, "elapsed_time": "4:06:23", "remaining_time": "11:41:26"} +{"current_steps": 1415, "total_steps": 5424, "loss": 0.9805, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.206336846441695e-05, "epoch": 0.78, "percentage": 26.09, "elapsed_time": "4:07:16", "remaining_time": "11:40:33"} +{"current_steps": 1420, "total_steps": 5424, "loss": 1.0177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.201038301632772e-05, "epoch": 0.79, "percentage": 26.18, "elapsed_time": "4:08:08", "remaining_time": "11:39:41"} +{"current_steps": 1425, "total_steps": 5424, "loss": 0.9938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.195725490430917e-05, "epoch": 0.79, "percentage": 26.27, "elapsed_time": "4:09:01", "remaining_time": "11:38:49"} +{"current_steps": 1430, "total_steps": 5424, "loss": 1.0276, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.190398457394007e-05, "epoch": 0.79, "percentage": 26.36, "elapsed_time": "4:09:53", "remaining_time": "11:37:57"} +{"current_steps": 1435, "total_steps": 5424, "loss": 0.9909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1850572471991924e-05, "epoch": 0.79, "percentage": 26.46, "elapsed_time": "4:10:46", "remaining_time": "11:37:04"} +{"current_steps": 1440, "total_steps": 5424, "loss": 0.9794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1797019046425264e-05, "epoch": 0.8, "percentage": 26.55, "elapsed_time": "4:11:38", "remaining_time": "11:36:12"} +{"current_steps": 1445, "total_steps": 5424, "loss": 1.0073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1743324746385914e-05, "epoch": 0.8, "percentage": 26.64, "elapsed_time": "4:12:31", "remaining_time": "11:35:21"} +{"current_steps": 1450, "total_steps": 5424, "loss": 1.0106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1689490022201154e-05, "epoch": 0.8, "percentage": 26.73, "elapsed_time": "4:13:23", "remaining_time": "11:34:29"} +{"current_steps": 1455, "total_steps": 5424, "loss": 1.0241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.163551532537601e-05, "epoch": 0.8, "percentage": 26.83, "elapsed_time": "4:14:16", "remaining_time": "11:33:36"} +{"current_steps": 1460, "total_steps": 5424, "loss": 1.0178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1581401108589425e-05, "epoch": 0.81, "percentage": 26.92, "elapsed_time": "4:15:08", "remaining_time": "11:32:44"} +{"current_steps": 1465, "total_steps": 5424, "loss": 0.9426, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1527147825690495e-05, "epoch": 0.81, "percentage": 27.01, "elapsed_time": "4:16:01", "remaining_time": "11:31:52"} +{"current_steps": 1470, "total_steps": 5424, "loss": 0.9642, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1472755931694626e-05, "epoch": 0.81, "percentage": 27.1, "elapsed_time": "4:16:54", "remaining_time": "11:31:00"} +{"current_steps": 1475, "total_steps": 5424, "loss": 0.9678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.141822588277976e-05, "epoch": 0.82, "percentage": 27.19, "elapsed_time": "4:17:46", "remaining_time": "11:30:08"} +{"current_steps": 1480, "total_steps": 5424, "loss": 0.9654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.136355813628251e-05, "epoch": 0.82, "percentage": 27.29, "elapsed_time": "4:18:38", "remaining_time": "11:29:15"} +{"current_steps": 1485, "total_steps": 5424, "loss": 0.9748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.130875315069435e-05, "epoch": 0.82, "percentage": 27.38, "elapsed_time": "4:19:31", "remaining_time": "11:28:23"} +{"current_steps": 1490, "total_steps": 5424, "loss": 0.9321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.125381138565775e-05, "epoch": 0.82, "percentage": 27.47, "elapsed_time": "4:20:23", "remaining_time": "11:27:30"} +{"current_steps": 1495, "total_steps": 5424, "loss": 0.9885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1198733301962346e-05, "epoch": 0.83, "percentage": 27.56, "elapsed_time": "4:21:15", "remaining_time": "11:26:37"} +{"current_steps": 1500, "total_steps": 5424, "loss": 0.9385, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.114351936154105e-05, "epoch": 0.83, "percentage": 27.65, "elapsed_time": "4:22:08", "remaining_time": "11:25:45"} +{"current_steps": 1505, "total_steps": 5424, "loss": 0.9362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.108817002746619e-05, "epoch": 0.83, "percentage": 27.75, "elapsed_time": "4:23:00", "remaining_time": "11:24:52"} +{"current_steps": 1510, "total_steps": 5424, "loss": 0.9764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1032685763945625e-05, "epoch": 0.83, "percentage": 27.84, "elapsed_time": "4:23:53", "remaining_time": "11:24:00"} +{"current_steps": 1515, "total_steps": 5424, "loss": 0.961, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.097706703631886e-05, "epoch": 0.84, "percentage": 27.93, "elapsed_time": "4:24:45", "remaining_time": "11:23:07"} +{"current_steps": 1520, "total_steps": 5424, "loss": 0.9818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.092131431105312e-05, "epoch": 0.84, "percentage": 28.02, "elapsed_time": "4:25:37", "remaining_time": "11:22:15"} +{"current_steps": 1525, "total_steps": 5424, "loss": 0.9855, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.086542805573945e-05, "epoch": 0.84, "percentage": 28.12, "elapsed_time": "4:26:30", "remaining_time": "11:21:22"} +{"current_steps": 1530, "total_steps": 5424, "loss": 0.9618, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.080940873908881e-05, "epoch": 0.85, "percentage": 28.21, "elapsed_time": "4:27:22", "remaining_time": "11:20:30"} +{"current_steps": 1535, "total_steps": 5424, "loss": 1.0102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.07532568309281e-05, "epoch": 0.85, "percentage": 28.3, "elapsed_time": "4:28:14", "remaining_time": "11:19:37"} +{"current_steps": 1540, "total_steps": 5424, "loss": 0.9773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.069697280219628e-05, "epoch": 0.85, "percentage": 28.39, "elapsed_time": "4:29:07", "remaining_time": "11:18:44"} +{"current_steps": 1545, "total_steps": 5424, "loss": 0.9687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0640557124940376e-05, "epoch": 0.85, "percentage": 28.48, "elapsed_time": "4:29:59", "remaining_time": "11:17:52"} +{"current_steps": 1550, "total_steps": 5424, "loss": 0.9818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.058401027231152e-05, "epoch": 0.86, "percentage": 28.58, "elapsed_time": "4:30:52", "remaining_time": "11:16:59"} +{"current_steps": 1555, "total_steps": 5424, "loss": 0.9911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.052733271856103e-05, "epoch": 0.86, "percentage": 28.67, "elapsed_time": "4:31:44", "remaining_time": "11:16:07"} +{"current_steps": 1560, "total_steps": 5424, "loss": 0.9694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0470524939036355e-05, "epoch": 0.86, "percentage": 28.76, "elapsed_time": "4:32:36", "remaining_time": "11:15:14"} +{"current_steps": 1565, "total_steps": 5424, "loss": 0.9374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0413587410177155e-05, "epoch": 0.87, "percentage": 28.85, "elapsed_time": "4:33:29", "remaining_time": "11:14:21"} +{"current_steps": 1570, "total_steps": 5424, "loss": 0.956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.035652060951128e-05, "epoch": 0.87, "percentage": 28.95, "elapsed_time": "4:34:21", "remaining_time": "11:13:29"} +{"current_steps": 1575, "total_steps": 5424, "loss": 0.9491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0299325015650774e-05, "epoch": 0.87, "percentage": 29.04, "elapsed_time": "4:35:13", "remaining_time": "11:12:36"} +{"current_steps": 1580, "total_steps": 5424, "loss": 0.9569, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.024200110828783e-05, "epoch": 0.87, "percentage": 29.13, "elapsed_time": "4:36:06", "remaining_time": "11:11:43"} +{"current_steps": 1585, "total_steps": 5424, "loss": 0.9627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.018454936819082e-05, "epoch": 0.88, "percentage": 29.22, "elapsed_time": "4:36:58", "remaining_time": "11:10:51"} +{"current_steps": 1590, "total_steps": 5424, "loss": 0.9922, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.012697027720018e-05, "epoch": 0.88, "percentage": 29.31, "elapsed_time": "4:37:50", "remaining_time": "11:09:59"} +{"current_steps": 1595, "total_steps": 5424, "loss": 0.9703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0069264318224506e-05, "epoch": 0.88, "percentage": 29.41, "elapsed_time": "4:38:43", "remaining_time": "11:09:06"} +{"current_steps": 1600, "total_steps": 5424, "loss": 0.9436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0011431975236337e-05, "epoch": 0.88, "percentage": 29.5, "elapsed_time": "4:39:35", "remaining_time": "11:08:14"} +{"current_steps": 1605, "total_steps": 5424, "loss": 0.961, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.995347373326822e-05, "epoch": 0.89, "percentage": 29.59, "elapsed_time": "4:40:28", "remaining_time": "11:07:22"} +{"current_steps": 1610, "total_steps": 5424, "loss": 0.9247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.989539007840861e-05, "epoch": 0.89, "percentage": 29.68, "elapsed_time": "4:41:21", "remaining_time": "11:06:30"} +{"current_steps": 1615, "total_steps": 5424, "loss": 0.9537, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.983718149779775e-05, "epoch": 0.89, "percentage": 29.78, "elapsed_time": "4:42:13", "remaining_time": "11:05:38"} +{"current_steps": 1620, "total_steps": 5424, "loss": 0.9415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9778848479623656e-05, "epoch": 0.9, "percentage": 29.87, "elapsed_time": "4:43:06", "remaining_time": "11:04:45"} +{"current_steps": 1625, "total_steps": 5424, "loss": 0.9954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.972039151311795e-05, "epoch": 0.9, "percentage": 29.96, "elapsed_time": "4:43:58", "remaining_time": "11:03:53"} +{"current_steps": 1630, "total_steps": 5424, "loss": 0.9451, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.966181108855183e-05, "epoch": 0.9, "percentage": 30.05, "elapsed_time": "4:44:50", "remaining_time": "11:03:00"} +{"current_steps": 1635, "total_steps": 5424, "loss": 0.9977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.960310769723189e-05, "epoch": 0.9, "percentage": 30.14, "elapsed_time": "4:45:43", "remaining_time": "11:02:08"} +{"current_steps": 1640, "total_steps": 5424, "loss": 0.9806, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9544281831496034e-05, "epoch": 0.91, "percentage": 30.24, "elapsed_time": "4:46:35", "remaining_time": "11:01:15"} +{"current_steps": 1645, "total_steps": 5424, "loss": 0.9851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9485333984709374e-05, "epoch": 0.91, "percentage": 30.33, "elapsed_time": "4:47:28", "remaining_time": "11:00:23"} +{"current_steps": 1650, "total_steps": 5424, "loss": 1.0089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.942626465126001e-05, "epoch": 0.91, "percentage": 30.42, "elapsed_time": "4:48:20", "remaining_time": "10:59:30"} +{"current_steps": 1655, "total_steps": 5424, "loss": 0.9562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9367074326555e-05, "epoch": 0.92, "percentage": 30.51, "elapsed_time": "4:49:12", "remaining_time": "10:58:38"} +{"current_steps": 1660, "total_steps": 5424, "loss": 0.9892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.930776350701609e-05, "epoch": 0.92, "percentage": 30.6, "elapsed_time": "4:50:05", "remaining_time": "10:57:45"} +{"current_steps": 1665, "total_steps": 5424, "loss": 0.9875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.92483326900756e-05, "epoch": 0.92, "percentage": 30.7, "elapsed_time": "4:50:57", "remaining_time": "10:56:53"} +{"current_steps": 1670, "total_steps": 5424, "loss": 1.0012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.91887823741723e-05, "epoch": 0.92, "percentage": 30.79, "elapsed_time": "4:51:49", "remaining_time": "10:56:00"} +{"current_steps": 1675, "total_steps": 5424, "loss": 0.97, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9129113058747136e-05, "epoch": 0.93, "percentage": 30.88, "elapsed_time": "4:52:42", "remaining_time": "10:55:07"} +{"current_steps": 1680, "total_steps": 5424, "loss": 0.9793, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9069325244239095e-05, "epoch": 0.93, "percentage": 30.97, "elapsed_time": "4:53:34", "remaining_time": "10:54:15"} +{"current_steps": 1685, "total_steps": 5424, "loss": 0.9864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.900941943208103e-05, "epoch": 0.93, "percentage": 31.07, "elapsed_time": "4:54:27", "remaining_time": "10:53:23"} +{"current_steps": 1690, "total_steps": 5424, "loss": 0.9596, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.894939612469539e-05, "epoch": 0.93, "percentage": 31.16, "elapsed_time": "4:55:19", "remaining_time": "10:52:30"} +{"current_steps": 1695, "total_steps": 5424, "loss": 0.9679, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.888925582549006e-05, "epoch": 0.94, "percentage": 31.25, "elapsed_time": "4:56:11", "remaining_time": "10:51:37"} +{"current_steps": 1700, "total_steps": 5424, "loss": 0.9395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.882899903885412e-05, "epoch": 0.94, "percentage": 31.34, "elapsed_time": "4:57:04", "remaining_time": "10:50:45"} +{"current_steps": 1705, "total_steps": 5424, "loss": 0.9774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.876862627015361e-05, "epoch": 0.94, "percentage": 31.43, "elapsed_time": "4:57:56", "remaining_time": "10:49:53"} +{"current_steps": 1710, "total_steps": 5424, "loss": 0.9686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.87081380257273e-05, "epoch": 0.95, "percentage": 31.53, "elapsed_time": "4:58:48", "remaining_time": "10:49:00"} +{"current_steps": 1715, "total_steps": 5424, "loss": 0.953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.864753481288244e-05, "epoch": 0.95, "percentage": 31.62, "elapsed_time": "4:59:41", "remaining_time": "10:48:07"} +{"current_steps": 1720, "total_steps": 5424, "loss": 0.9719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8586817139890515e-05, "epoch": 0.95, "percentage": 31.71, "elapsed_time": "5:00:33", "remaining_time": "10:47:15"} +{"current_steps": 1725, "total_steps": 5424, "loss": 0.9748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.852598551598294e-05, "epoch": 0.95, "percentage": 31.8, "elapsed_time": "5:01:26", "remaining_time": "10:46:22"} +{"current_steps": 1730, "total_steps": 5424, "loss": 0.9741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8465040451346874e-05, "epoch": 0.96, "percentage": 31.9, "elapsed_time": "5:02:18", "remaining_time": "10:45:30"} +{"current_steps": 1735, "total_steps": 5424, "loss": 0.9747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8403982457120836e-05, "epoch": 0.96, "percentage": 31.99, "elapsed_time": "5:03:10", "remaining_time": "10:44:37"} +{"current_steps": 1740, "total_steps": 5424, "loss": 0.9791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.834281204539051e-05, "epoch": 0.96, "percentage": 32.08, "elapsed_time": "5:04:03", "remaining_time": "10:43:44"} +{"current_steps": 1745, "total_steps": 5424, "loss": 0.9704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.828152972918438e-05, "epoch": 0.96, "percentage": 32.17, "elapsed_time": "5:04:55", "remaining_time": "10:42:52"} +{"current_steps": 1750, "total_steps": 5424, "loss": 0.9556, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.82201360224695e-05, "epoch": 0.97, "percentage": 32.26, "elapsed_time": "5:05:47", "remaining_time": "10:41:59"} +{"current_steps": 1755, "total_steps": 5424, "loss": 0.9552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.815863144014711e-05, "epoch": 0.97, "percentage": 32.36, "elapsed_time": "5:06:40", "remaining_time": "10:41:07"} +{"current_steps": 1760, "total_steps": 5424, "loss": 0.9594, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.809701649804834e-05, "epoch": 0.97, "percentage": 32.45, "elapsed_time": "5:07:32", "remaining_time": "10:40:14"} +{"current_steps": 1765, "total_steps": 5424, "loss": 0.959, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8035291712929926e-05, "epoch": 0.98, "percentage": 32.54, "elapsed_time": "5:08:24", "remaining_time": "10:39:22"} +{"current_steps": 1770, "total_steps": 5424, "loss": 1.0254, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.797345760246982e-05, "epoch": 0.98, "percentage": 32.63, "elapsed_time": "5:09:17", "remaining_time": "10:38:29"} +{"current_steps": 1775, "total_steps": 5424, "loss": 0.9925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.791151468526289e-05, "epoch": 0.98, "percentage": 32.72, "elapsed_time": "5:10:09", "remaining_time": "10:37:36"} +{"current_steps": 1780, "total_steps": 5424, "loss": 0.9516, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.784946348081654e-05, "epoch": 0.98, "percentage": 32.82, "elapsed_time": "5:11:01", "remaining_time": "10:36:44"} +{"current_steps": 1785, "total_steps": 5424, "loss": 0.954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7787304509546365e-05, "epoch": 0.99, "percentage": 32.91, "elapsed_time": "5:11:54", "remaining_time": "10:35:51"} +{"current_steps": 1790, "total_steps": 5424, "loss": 0.9465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7725038292771774e-05, "epoch": 0.99, "percentage": 33.0, "elapsed_time": "5:12:46", "remaining_time": "10:34:59"} +{"current_steps": 1795, "total_steps": 5424, "loss": 0.9792, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.766266535271167e-05, "epoch": 0.99, "percentage": 33.09, "elapsed_time": "5:13:38", "remaining_time": "10:34:06"} +{"current_steps": 1800, "total_steps": 5424, "loss": 0.964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.760018621248e-05, "epoch": 1.0, "percentage": 33.19, "elapsed_time": "5:14:31", "remaining_time": "10:33:14"} +{"current_steps": 1805, "total_steps": 5424, "loss": 0.9419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.75376013960814e-05, "epoch": 1.0, "percentage": 33.28, "elapsed_time": "5:15:23", "remaining_time": "10:32:21"} +{"current_steps": 1810, "total_steps": 5424, "loss": 0.9818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.747491142840681e-05, "epoch": 1.0, "percentage": 33.37, "elapsed_time": "5:16:15", "remaining_time": "10:31:28"} +{"current_steps": 1815, "total_steps": 5424, "loss": 0.9153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.741211683522904e-05, "epoch": 1.0, "percentage": 33.46, "elapsed_time": "5:17:07", "remaining_time": "10:30:35"} +{"current_steps": 1820, "total_steps": 5424, "loss": 0.955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.734921814319841e-05, "epoch": 1.01, "percentage": 33.55, "elapsed_time": "5:18:00", "remaining_time": "10:29:43"} +{"current_steps": 1825, "total_steps": 5424, "loss": 0.9467, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.728621587983828e-05, "epoch": 1.01, "percentage": 33.65, "elapsed_time": "5:18:52", "remaining_time": "10:28:50"} +{"current_steps": 1830, "total_steps": 5424, "loss": 0.9816, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.722311057354067e-05, "epoch": 1.01, "percentage": 33.74, "elapsed_time": "5:19:45", "remaining_time": "10:27:58"} +{"current_steps": 1835, "total_steps": 5424, "loss": 0.9727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.715990275356178e-05, "epoch": 1.01, "percentage": 33.83, "elapsed_time": "5:20:37", "remaining_time": "10:27:06"} +{"current_steps": 1840, "total_steps": 5424, "loss": 0.9822, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7096592950017617e-05, "epoch": 1.02, "percentage": 33.92, "elapsed_time": "5:21:30", "remaining_time": "10:26:13"} +{"current_steps": 1845, "total_steps": 5424, "loss": 0.942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.703318169387947e-05, "epoch": 1.02, "percentage": 34.02, "elapsed_time": "5:22:22", "remaining_time": "10:25:21"} +{"current_steps": 1850, "total_steps": 5424, "loss": 0.9306, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.696966951696952e-05, "epoch": 1.02, "percentage": 34.11, "elapsed_time": "5:23:15", "remaining_time": "10:24:29"} +{"current_steps": 1855, "total_steps": 5424, "loss": 0.9654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.690605695195637e-05, "epoch": 1.03, "percentage": 34.2, "elapsed_time": "5:24:07", "remaining_time": "10:23:37"} +{"current_steps": 1860, "total_steps": 5424, "loss": 0.9592, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.684234453235054e-05, "epoch": 1.03, "percentage": 34.29, "elapsed_time": "5:25:00", "remaining_time": "10:22:45"} +{"current_steps": 1865, "total_steps": 5424, "loss": 0.988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.677853279250003e-05, "epoch": 1.03, "percentage": 34.38, "elapsed_time": "5:25:53", "remaining_time": "10:21:53"} +{"current_steps": 1870, "total_steps": 5424, "loss": 0.9462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.671462226758583e-05, "epoch": 1.03, "percentage": 34.48, "elapsed_time": "5:26:45", "remaining_time": "10:21:00"} +{"current_steps": 1875, "total_steps": 5424, "loss": 0.9685, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.665061349361742e-05, "epoch": 1.04, "percentage": 34.57, "elapsed_time": "5:27:37", "remaining_time": "10:20:08"} +{"current_steps": 1880, "total_steps": 5424, "loss": 0.9772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.658650700742828e-05, "epoch": 1.04, "percentage": 34.66, "elapsed_time": "5:28:30", "remaining_time": "10:19:15"} +{"current_steps": 1885, "total_steps": 5424, "loss": 0.9482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6522303346671404e-05, "epoch": 1.04, "percentage": 34.75, "elapsed_time": "5:29:22", "remaining_time": "10:18:23"} +{"current_steps": 1890, "total_steps": 5424, "loss": 1.0069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.645800304981477e-05, "epoch": 1.05, "percentage": 34.85, "elapsed_time": "5:30:14", "remaining_time": "10:17:30"} +{"current_steps": 1895, "total_steps": 5424, "loss": 0.9003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.639360665613683e-05, "epoch": 1.05, "percentage": 34.94, "elapsed_time": "5:31:07", "remaining_time": "10:16:38"} +{"current_steps": 1900, "total_steps": 5424, "loss": 0.9279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.632911470572197e-05, "epoch": 1.05, "percentage": 35.03, "elapsed_time": "5:31:59", "remaining_time": "10:15:45"} +{"current_steps": 1905, "total_steps": 5424, "loss": 0.9237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.626452773945603e-05, "epoch": 1.05, "percentage": 35.12, "elapsed_time": "5:32:52", "remaining_time": "10:14:53"} +{"current_steps": 1910, "total_steps": 5424, "loss": 1.0086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.619984629902172e-05, "epoch": 1.06, "percentage": 35.21, "elapsed_time": "5:33:44", "remaining_time": "10:14:00"} +{"current_steps": 1915, "total_steps": 5424, "loss": 0.9625, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.613507092689409e-05, "epoch": 1.06, "percentage": 35.31, "elapsed_time": "5:34:36", "remaining_time": "10:13:08"} +{"current_steps": 1920, "total_steps": 5424, "loss": 0.9297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.607020216633599e-05, "epoch": 1.06, "percentage": 35.4, "elapsed_time": "5:35:29", "remaining_time": "10:12:15"} +{"current_steps": 1925, "total_steps": 5424, "loss": 0.9718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.60052405613935e-05, "epoch": 1.06, "percentage": 35.49, "elapsed_time": "5:36:21", "remaining_time": "10:11:23"} +{"current_steps": 1930, "total_steps": 5424, "loss": 0.9512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.594018665689139e-05, "epoch": 1.07, "percentage": 35.58, "elapsed_time": "5:37:13", "remaining_time": "10:10:30"} +{"current_steps": 1935, "total_steps": 5424, "loss": 0.9923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5875040998428513e-05, "epoch": 1.07, "percentage": 35.67, "elapsed_time": "5:38:06", "remaining_time": "10:09:38"} +{"current_steps": 1940, "total_steps": 5424, "loss": 0.9518, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5809804132373253e-05, "epoch": 1.07, "percentage": 35.77, "elapsed_time": "5:38:58", "remaining_time": "10:08:45"} +{"current_steps": 1945, "total_steps": 5424, "loss": 0.9324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.574447660585897e-05, "epoch": 1.08, "percentage": 35.86, "elapsed_time": "5:39:50", "remaining_time": "10:07:52"} +{"current_steps": 1950, "total_steps": 5424, "loss": 0.9724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5679058966779344e-05, "epoch": 1.08, "percentage": 35.95, "elapsed_time": "5:40:43", "remaining_time": "10:07:00"} +{"current_steps": 1955, "total_steps": 5424, "loss": 0.978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.561355176378384e-05, "epoch": 1.08, "percentage": 36.04, "elapsed_time": "5:41:35", "remaining_time": "10:06:07"} +{"current_steps": 1960, "total_steps": 5424, "loss": 0.9893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.554795554627307e-05, "epoch": 1.08, "percentage": 36.14, "elapsed_time": "5:42:27", "remaining_time": "10:05:15"} +{"current_steps": 1965, "total_steps": 5424, "loss": 0.967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.548227086439422e-05, "epoch": 1.09, "percentage": 36.23, "elapsed_time": "5:43:20", "remaining_time": "10:04:22"} +{"current_steps": 1970, "total_steps": 5424, "loss": 0.9529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.541649826903639e-05, "epoch": 1.09, "percentage": 36.32, "elapsed_time": "5:44:12", "remaining_time": "10:03:30"} +{"current_steps": 1975, "total_steps": 5424, "loss": 0.9506, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.535063831182602e-05, "epoch": 1.09, "percentage": 36.41, "elapsed_time": "5:45:04", "remaining_time": "10:02:37"} +{"current_steps": 1980, "total_steps": 5424, "loss": 0.9525, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.528469154512224e-05, "epoch": 1.09, "percentage": 36.5, "elapsed_time": "5:45:57", "remaining_time": "10:01:45"} +{"current_steps": 1985, "total_steps": 5424, "loss": 0.9258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.521865852201223e-05, "epoch": 1.1, "percentage": 36.6, "elapsed_time": "5:46:49", "remaining_time": "10:00:52"} +{"current_steps": 1990, "total_steps": 5424, "loss": 0.9417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5152539796306596e-05, "epoch": 1.1, "percentage": 36.69, "elapsed_time": "5:47:42", "remaining_time": "10:00:00"} +{"current_steps": 1995, "total_steps": 5424, "loss": 0.9465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.508633592253472e-05, "epoch": 1.1, "percentage": 36.78, "elapsed_time": "5:48:34", "remaining_time": "9:59:07"} +{"current_steps": 2000, "total_steps": 5424, "loss": 0.9494, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.502004745594011e-05, "epoch": 1.11, "percentage": 36.87, "elapsed_time": "5:49:26", "remaining_time": "9:58:15"} +{"current_steps": 2005, "total_steps": 5424, "loss": 0.9709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4953674952475755e-05, "epoch": 1.11, "percentage": 36.97, "elapsed_time": "5:50:19", "remaining_time": "9:57:22"} +{"current_steps": 2010, "total_steps": 5424, "loss": 0.9581, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.488721896879943e-05, "epoch": 1.11, "percentage": 37.06, "elapsed_time": "5:51:11", "remaining_time": "9:56:30"} +{"current_steps": 2015, "total_steps": 5424, "loss": 0.974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4820680062269074e-05, "epoch": 1.11, "percentage": 37.15, "elapsed_time": "5:52:04", "remaining_time": "9:55:37"} +{"current_steps": 2020, "total_steps": 5424, "loss": 0.9768, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4754058790938046e-05, "epoch": 1.12, "percentage": 37.24, "elapsed_time": "5:52:56", "remaining_time": "9:54:45"} +{"current_steps": 2025, "total_steps": 5424, "loss": 0.982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.468735571355055e-05, "epoch": 1.12, "percentage": 37.33, "elapsed_time": "5:53:48", "remaining_time": "9:53:52"} +{"current_steps": 2030, "total_steps": 5424, "loss": 0.984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4620571389536825e-05, "epoch": 1.12, "percentage": 37.43, "elapsed_time": "5:54:41", "remaining_time": "9:53:00"} +{"current_steps": 2035, "total_steps": 5424, "loss": 0.9604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.455370637900856e-05, "epoch": 1.13, "percentage": 37.52, "elapsed_time": "5:55:33", "remaining_time": "9:52:08"} +{"current_steps": 2040, "total_steps": 5424, "loss": 0.897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.448676124275414e-05, "epoch": 1.13, "percentage": 37.61, "elapsed_time": "5:56:25", "remaining_time": "9:51:15"} +{"current_steps": 2045, "total_steps": 5424, "loss": 0.9968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4419736542233925e-05, "epoch": 1.13, "percentage": 37.7, "elapsed_time": "5:57:18", "remaining_time": "9:50:23"} +{"current_steps": 2050, "total_steps": 5424, "loss": 0.9479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4352632839575616e-05, "epoch": 1.13, "percentage": 37.79, "elapsed_time": "5:58:10", "remaining_time": "9:49:30"} +{"current_steps": 2055, "total_steps": 5424, "loss": 0.9724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.428545069756946e-05, "epoch": 1.14, "percentage": 37.89, "elapsed_time": "5:59:02", "remaining_time": "9:48:37"} +{"current_steps": 2060, "total_steps": 5424, "loss": 0.9493, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.42181906796636e-05, "epoch": 1.14, "percentage": 37.98, "elapsed_time": "5:59:55", "remaining_time": "9:47:45"} +{"current_steps": 2065, "total_steps": 5424, "loss": 0.9348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.415085334995927e-05, "epoch": 1.14, "percentage": 38.07, "elapsed_time": "6:00:47", "remaining_time": "9:46:52"} +{"current_steps": 2070, "total_steps": 5424, "loss": 0.9702, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.408343927320613e-05, "epoch": 1.14, "percentage": 38.16, "elapsed_time": "6:01:39", "remaining_time": "9:46:00"} +{"current_steps": 2075, "total_steps": 5424, "loss": 0.9089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.401594901479753e-05, "epoch": 1.15, "percentage": 38.26, "elapsed_time": "6:02:32", "remaining_time": "9:45:07"} +{"current_steps": 2080, "total_steps": 5424, "loss": 0.9606, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.394838314076572e-05, "epoch": 1.15, "percentage": 38.35, "elapsed_time": "6:03:24", "remaining_time": "9:44:14"} +{"current_steps": 2085, "total_steps": 5424, "loss": 0.9743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3880742217777115e-05, "epoch": 1.15, "percentage": 38.44, "elapsed_time": "6:04:16", "remaining_time": "9:43:22"} +{"current_steps": 2090, "total_steps": 5424, "loss": 0.9469, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.381302681312759e-05, "epoch": 1.16, "percentage": 38.53, "elapsed_time": "6:05:09", "remaining_time": "9:42:29"} +{"current_steps": 2095, "total_steps": 5424, "loss": 0.949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.374523749473767e-05, "epoch": 1.16, "percentage": 38.62, "elapsed_time": "6:06:01", "remaining_time": "9:41:37"} +{"current_steps": 2100, "total_steps": 5424, "loss": 0.9567, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.367737483114779e-05, "epoch": 1.16, "percentage": 38.72, "elapsed_time": "6:06:53", "remaining_time": "9:40:44"} +{"current_steps": 2105, "total_steps": 5424, "loss": 0.9718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.360943939151351e-05, "epoch": 1.16, "percentage": 38.81, "elapsed_time": "6:07:46", "remaining_time": "9:39:52"} +{"current_steps": 2110, "total_steps": 5424, "loss": 0.9626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.354143174560078e-05, "epoch": 1.17, "percentage": 38.9, "elapsed_time": "6:08:38", "remaining_time": "9:38:59"} +{"current_steps": 2115, "total_steps": 5424, "loss": 0.9346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3473352463781105e-05, "epoch": 1.17, "percentage": 38.99, "elapsed_time": "6:09:30", "remaining_time": "9:38:06"} +{"current_steps": 2120, "total_steps": 5424, "loss": 0.9208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.340520211702681e-05, "epoch": 1.17, "percentage": 39.09, "elapsed_time": "6:10:23", "remaining_time": "9:37:14"} +{"current_steps": 2125, "total_steps": 5424, "loss": 0.9856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.333698127690623e-05, "epoch": 1.18, "percentage": 39.18, "elapsed_time": "6:11:15", "remaining_time": "9:36:21"} +{"current_steps": 2130, "total_steps": 5424, "loss": 0.9049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.326869051557891e-05, "epoch": 1.18, "percentage": 39.27, "elapsed_time": "6:12:07", "remaining_time": "9:35:29"} +{"current_steps": 2135, "total_steps": 5424, "loss": 0.9222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.320033040579082e-05, "epoch": 1.18, "percentage": 39.36, "elapsed_time": "6:12:59", "remaining_time": "9:34:36"} +{"current_steps": 2140, "total_steps": 5424, "loss": 0.9648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3131901520869565e-05, "epoch": 1.18, "percentage": 39.45, "elapsed_time": "6:13:52", "remaining_time": "9:33:44"} +{"current_steps": 2145, "total_steps": 5424, "loss": 0.9538, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.306340443471951e-05, "epoch": 1.19, "percentage": 39.55, "elapsed_time": "6:14:44", "remaining_time": "9:32:51"} +{"current_steps": 2150, "total_steps": 5424, "loss": 0.9314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.299483972181708e-05, "epoch": 1.19, "percentage": 39.64, "elapsed_time": "6:15:36", "remaining_time": "9:31:58"} +{"current_steps": 2155, "total_steps": 5424, "loss": 0.9576, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.292620795720583e-05, "epoch": 1.19, "percentage": 39.73, "elapsed_time": "6:16:29", "remaining_time": "9:31:06"} +{"current_steps": 2160, "total_steps": 5424, "loss": 0.9499, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.285750971649167e-05, "epoch": 1.19, "percentage": 39.82, "elapsed_time": "6:17:21", "remaining_time": "9:30:13"} +{"current_steps": 2165, "total_steps": 5424, "loss": 0.9568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.278874557583807e-05, "epoch": 1.2, "percentage": 39.92, "elapsed_time": "6:18:13", "remaining_time": "9:29:21"} +{"current_steps": 2170, "total_steps": 5424, "loss": 0.9642, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.271991611196117e-05, "epoch": 1.2, "percentage": 40.01, "elapsed_time": "6:19:06", "remaining_time": "9:28:28"} +{"current_steps": 2175, "total_steps": 5424, "loss": 0.9526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.265102190212497e-05, "epoch": 1.2, "percentage": 40.1, "elapsed_time": "6:19:58", "remaining_time": "9:27:36"} +{"current_steps": 2180, "total_steps": 5424, "loss": 0.933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.258206352413648e-05, "epoch": 1.21, "percentage": 40.19, "elapsed_time": "6:20:50", "remaining_time": "9:26:43"} +{"current_steps": 2185, "total_steps": 5424, "loss": 0.9683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2513041556340887e-05, "epoch": 1.21, "percentage": 40.28, "elapsed_time": "6:21:43", "remaining_time": "9:25:51"} +{"current_steps": 2190, "total_steps": 5424, "loss": 0.9155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.244395657761671e-05, "epoch": 1.21, "percentage": 40.38, "elapsed_time": "6:22:35", "remaining_time": "9:24:58"} +{"current_steps": 2195, "total_steps": 5424, "loss": 0.9262, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2374809167370924e-05, "epoch": 1.21, "percentage": 40.47, "elapsed_time": "6:23:27", "remaining_time": "9:24:06"} +{"current_steps": 2200, "total_steps": 5424, "loss": 0.9778, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.230559990553409e-05, "epoch": 1.22, "percentage": 40.56, "elapsed_time": "6:24:20", "remaining_time": "9:23:13"} +{"current_steps": 2205, "total_steps": 5424, "loss": 0.9577, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2236329372555544e-05, "epoch": 1.22, "percentage": 40.65, "elapsed_time": "6:25:12", "remaining_time": "9:22:21"} +{"current_steps": 2210, "total_steps": 5424, "loss": 0.9286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2166998149398465e-05, "epoch": 1.22, "percentage": 40.74, "elapsed_time": "6:26:05", "remaining_time": "9:21:28"} +{"current_steps": 2215, "total_steps": 5424, "loss": 0.9634, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.209760681753505e-05, "epoch": 1.22, "percentage": 40.84, "elapsed_time": "6:26:57", "remaining_time": "9:20:36"} +{"current_steps": 2220, "total_steps": 5424, "loss": 0.9467, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2028155958941615e-05, "epoch": 1.23, "percentage": 40.93, "elapsed_time": "6:27:49", "remaining_time": "9:19:43"} +{"current_steps": 2225, "total_steps": 5424, "loss": 0.9543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.195864615609373e-05, "epoch": 1.23, "percentage": 41.02, "elapsed_time": "6:28:42", "remaining_time": "9:18:51"} +{"current_steps": 2230, "total_steps": 5424, "loss": 0.9914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1889077991961304e-05, "epoch": 1.23, "percentage": 41.11, "elapsed_time": "6:29:34", "remaining_time": "9:17:58"} +{"current_steps": 2235, "total_steps": 5424, "loss": 0.9309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.181945205000373e-05, "epoch": 1.24, "percentage": 41.21, "elapsed_time": "6:30:26", "remaining_time": "9:17:06"} +{"current_steps": 2240, "total_steps": 5424, "loss": 0.9299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1749768914164955e-05, "epoch": 1.24, "percentage": 41.3, "elapsed_time": "6:31:18", "remaining_time": "9:16:13"} +{"current_steps": 2245, "total_steps": 5424, "loss": 0.9462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.168002916886864e-05, "epoch": 1.24, "percentage": 41.39, "elapsed_time": "6:32:11", "remaining_time": "9:15:21"} +{"current_steps": 2250, "total_steps": 5424, "loss": 0.948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1610233399013194e-05, "epoch": 1.24, "percentage": 41.48, "elapsed_time": "6:33:03", "remaining_time": "9:14:28"} +{"current_steps": 2255, "total_steps": 5424, "loss": 0.9335, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.15403821899669e-05, "epoch": 1.25, "percentage": 41.57, "elapsed_time": "6:33:55", "remaining_time": "9:13:36"} +{"current_steps": 2260, "total_steps": 5424, "loss": 0.952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.147047612756302e-05, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "6:34:48", "remaining_time": "9:12:43"} +{"current_steps": 2265, "total_steps": 5424, "loss": 0.9532, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.140051579809484e-05, "epoch": 1.25, "percentage": 41.76, "elapsed_time": "6:35:40", "remaining_time": "9:11:51"} +{"current_steps": 2270, "total_steps": 5424, "loss": 0.9853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.133050178831079e-05, "epoch": 1.26, "percentage": 41.85, "elapsed_time": "6:36:32", "remaining_time": "9:10:58"} +{"current_steps": 2275, "total_steps": 5424, "loss": 0.8925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.12604346854095e-05, "epoch": 1.26, "percentage": 41.94, "elapsed_time": "6:37:25", "remaining_time": "9:10:06"} +{"current_steps": 2280, "total_steps": 5424, "loss": 0.942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.119031507703491e-05, "epoch": 1.26, "percentage": 42.04, "elapsed_time": "6:38:17", "remaining_time": "9:09:13"} +{"current_steps": 2285, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.112014355127129e-05, "epoch": 1.26, "percentage": 42.13, "elapsed_time": "6:39:09", "remaining_time": "9:08:21"} +{"current_steps": 2290, "total_steps": 5424, "loss": 0.9335, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.104992069663835e-05, "epoch": 1.27, "percentage": 42.22, "elapsed_time": "6:40:02", "remaining_time": "9:07:28"} +{"current_steps": 2295, "total_steps": 5424, "loss": 0.9403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0979647102086273e-05, "epoch": 1.27, "percentage": 42.31, "elapsed_time": "6:40:54", "remaining_time": "9:06:36"} +{"current_steps": 2300, "total_steps": 5424, "loss": 0.9246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.090932335699081e-05, "epoch": 1.27, "percentage": 42.4, "elapsed_time": "6:41:46", "remaining_time": "9:05:43"} +{"current_steps": 2305, "total_steps": 5424, "loss": 0.912, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.083895005114831e-05, "epoch": 1.27, "percentage": 42.5, "elapsed_time": "6:42:39", "remaining_time": "9:04:51"} +{"current_steps": 2310, "total_steps": 5424, "loss": 0.9334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.076852777477079e-05, "epoch": 1.28, "percentage": 42.59, "elapsed_time": "6:43:31", "remaining_time": "9:03:58"} +{"current_steps": 2315, "total_steps": 5424, "loss": 0.933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.069805711848096e-05, "epoch": 1.28, "percentage": 42.68, "elapsed_time": "6:44:24", "remaining_time": "9:03:06"} +{"current_steps": 2320, "total_steps": 5424, "loss": 0.9348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.062753867330729e-05, "epoch": 1.28, "percentage": 42.77, "elapsed_time": "6:45:16", "remaining_time": "9:02:13"} +{"current_steps": 2325, "total_steps": 5424, "loss": 0.9997, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.055697303067905e-05, "epoch": 1.29, "percentage": 42.87, "elapsed_time": "6:46:08", "remaining_time": "9:01:21"} +{"current_steps": 2330, "total_steps": 5424, "loss": 0.9196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.048636078242137e-05, "epoch": 1.29, "percentage": 42.96, "elapsed_time": "6:47:01", "remaining_time": "9:00:28"} +{"current_steps": 2335, "total_steps": 5424, "loss": 0.9735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0415702520750235e-05, "epoch": 1.29, "percentage": 43.05, "elapsed_time": "6:47:53", "remaining_time": "8:59:36"} +{"current_steps": 2340, "total_steps": 5424, "loss": 0.9395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0344998838267525e-05, "epoch": 1.29, "percentage": 43.14, "elapsed_time": "6:48:45", "remaining_time": "8:58:43"} +{"current_steps": 2345, "total_steps": 5424, "loss": 0.9455, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0274250327956093e-05, "epoch": 1.3, "percentage": 43.23, "elapsed_time": "6:49:38", "remaining_time": "8:57:51"} +{"current_steps": 2350, "total_steps": 5424, "loss": 0.968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.020345758317474e-05, "epoch": 1.3, "percentage": 43.33, "elapsed_time": "6:50:30", "remaining_time": "8:56:59"} +{"current_steps": 2355, "total_steps": 5424, "loss": 0.9403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0132621197653245e-05, "epoch": 1.3, "percentage": 43.42, "elapsed_time": "6:51:23", "remaining_time": "8:56:06"} +{"current_steps": 2360, "total_steps": 5424, "loss": 0.9267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0061741765487418e-05, "epoch": 1.3, "percentage": 43.51, "elapsed_time": "6:52:15", "remaining_time": "8:55:14"} +{"current_steps": 2365, "total_steps": 5424, "loss": 0.9734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9990819881134073e-05, "epoch": 1.31, "percentage": 43.6, "elapsed_time": "6:53:07", "remaining_time": "8:54:21"} +{"current_steps": 2370, "total_steps": 5424, "loss": 0.949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9919856139406093e-05, "epoch": 1.31, "percentage": 43.69, "elapsed_time": "6:53:59", "remaining_time": "8:53:28"} +{"current_steps": 2375, "total_steps": 5424, "loss": 0.9464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9848851135467386e-05, "epoch": 1.31, "percentage": 43.79, "elapsed_time": "6:54:52", "remaining_time": "8:52:36"} +{"current_steps": 2380, "total_steps": 5424, "loss": 0.9709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.977780546482794e-05, "epoch": 1.32, "percentage": 43.88, "elapsed_time": "6:55:44", "remaining_time": "8:51:44"} +{"current_steps": 2385, "total_steps": 5424, "loss": 0.9202, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9706719723338795e-05, "epoch": 1.32, "percentage": 43.97, "elapsed_time": "6:56:37", "remaining_time": "8:50:51"} +{"current_steps": 2390, "total_steps": 5424, "loss": 0.9625, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9635594507187074e-05, "epoch": 1.32, "percentage": 44.06, "elapsed_time": "6:57:29", "remaining_time": "8:49:59"} +{"current_steps": 2395, "total_steps": 5424, "loss": 0.9307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.956443041289096e-05, "epoch": 1.32, "percentage": 44.16, "elapsed_time": "6:58:21", "remaining_time": "8:49:06"} +{"current_steps": 2400, "total_steps": 5424, "loss": 0.907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9493228037294702e-05, "epoch": 1.33, "percentage": 44.25, "elapsed_time": "6:59:13", "remaining_time": "8:48:13"} +{"current_steps": 2405, "total_steps": 5424, "loss": 0.9487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9421987977563613e-05, "epoch": 1.33, "percentage": 44.34, "elapsed_time": "7:00:06", "remaining_time": "8:47:21"} +{"current_steps": 2410, "total_steps": 5424, "loss": 0.9259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.935071083117907e-05, "epoch": 1.33, "percentage": 44.43, "elapsed_time": "7:00:58", "remaining_time": "8:46:29"} +{"current_steps": 2415, "total_steps": 5424, "loss": 0.9357, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9279397195933457e-05, "epoch": 1.34, "percentage": 44.52, "elapsed_time": "7:01:51", "remaining_time": "8:45:36"} +{"current_steps": 2420, "total_steps": 5424, "loss": 0.9149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.920804766992521e-05, "epoch": 1.34, "percentage": 44.62, "elapsed_time": "7:02:43", "remaining_time": "8:44:44"} +{"current_steps": 2425, "total_steps": 5424, "loss": 0.9664, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9136662851553787e-05, "epoch": 1.34, "percentage": 44.71, "elapsed_time": "7:03:35", "remaining_time": "8:43:51"} +{"current_steps": 2430, "total_steps": 5424, "loss": 0.973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.906524333951461e-05, "epoch": 1.34, "percentage": 44.8, "elapsed_time": "7:04:28", "remaining_time": "8:42:59"} +{"current_steps": 2435, "total_steps": 5424, "loss": 0.9797, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.899378973279409e-05, "epoch": 1.35, "percentage": 44.89, "elapsed_time": "7:05:20", "remaining_time": "8:42:06"} +{"current_steps": 2440, "total_steps": 5424, "loss": 0.9604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.892230263066459e-05, "epoch": 1.35, "percentage": 44.99, "elapsed_time": "7:06:13", "remaining_time": "8:41:14"} +{"current_steps": 2445, "total_steps": 5424, "loss": 0.9147, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.885078263267938e-05, "epoch": 1.35, "percentage": 45.08, "elapsed_time": "7:07:05", "remaining_time": "8:40:22"} +{"current_steps": 2450, "total_steps": 5424, "loss": 0.9827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8779230338667634e-05, "epoch": 1.35, "percentage": 45.17, "elapsed_time": "7:07:57", "remaining_time": "8:39:29"} +{"current_steps": 2455, "total_steps": 5424, "loss": 0.9362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.870764634872939e-05, "epoch": 1.36, "percentage": 45.26, "elapsed_time": "7:08:50", "remaining_time": "8:38:37"} +{"current_steps": 2460, "total_steps": 5424, "loss": 0.9373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.86360312632305e-05, "epoch": 1.36, "percentage": 45.35, "elapsed_time": "7:09:42", "remaining_time": "8:37:44"} +{"current_steps": 2465, "total_steps": 5424, "loss": 0.955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8564385682797622e-05, "epoch": 1.36, "percentage": 45.45, "elapsed_time": "7:10:34", "remaining_time": "8:36:52"} +{"current_steps": 2470, "total_steps": 5424, "loss": 0.9108, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8492710208313177e-05, "epoch": 1.37, "percentage": 45.54, "elapsed_time": "7:11:27", "remaining_time": "8:35:59"} +{"current_steps": 2475, "total_steps": 5424, "loss": 0.9259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8421005440910303e-05, "epoch": 1.37, "percentage": 45.63, "elapsed_time": "7:12:19", "remaining_time": "8:35:07"} +{"current_steps": 2480, "total_steps": 5424, "loss": 0.9333, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8349271981967797e-05, "epoch": 1.37, "percentage": 45.72, "elapsed_time": "7:13:11", "remaining_time": "8:34:14"} +{"current_steps": 2485, "total_steps": 5424, "loss": 0.9437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8277510433105102e-05, "epoch": 1.37, "percentage": 45.81, "elapsed_time": "7:14:04", "remaining_time": "8:33:22"} +{"current_steps": 2490, "total_steps": 5424, "loss": 0.9189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.820572139617725e-05, "epoch": 1.38, "percentage": 45.91, "elapsed_time": "7:14:56", "remaining_time": "8:32:29"} +{"current_steps": 2495, "total_steps": 5424, "loss": 0.9522, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8133905473269802e-05, "epoch": 1.38, "percentage": 46.0, "elapsed_time": "7:15:48", "remaining_time": "8:31:37"} +{"current_steps": 2500, "total_steps": 5424, "loss": 0.9352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8062063266693818e-05, "epoch": 1.38, "percentage": 46.09, "elapsed_time": "7:16:41", "remaining_time": "8:30:45"} +{"current_steps": 2505, "total_steps": 5424, "loss": 0.9232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7990195378980784e-05, "epoch": 1.39, "percentage": 46.18, "elapsed_time": "7:17:33", "remaining_time": "8:29:52"} +{"current_steps": 2510, "total_steps": 5424, "loss": 0.8807, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7918302412877583e-05, "epoch": 1.39, "percentage": 46.28, "elapsed_time": "7:18:26", "remaining_time": "8:29:00"} +{"current_steps": 2515, "total_steps": 5424, "loss": 0.9231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7846384971341427e-05, "epoch": 1.39, "percentage": 46.37, "elapsed_time": "7:19:18", "remaining_time": "8:28:07"} +{"current_steps": 2520, "total_steps": 5424, "loss": 0.9253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7774443657534788e-05, "epoch": 1.39, "percentage": 46.46, "elapsed_time": "7:20:10", "remaining_time": "8:27:15"} +{"current_steps": 2525, "total_steps": 5424, "loss": 0.9646, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.770247907482036e-05, "epoch": 1.4, "percentage": 46.55, "elapsed_time": "7:21:03", "remaining_time": "8:26:22"} +{"current_steps": 2530, "total_steps": 5424, "loss": 0.9575, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.763049182675599e-05, "epoch": 1.4, "percentage": 46.64, "elapsed_time": "7:21:55", "remaining_time": "8:25:30"} +{"current_steps": 2535, "total_steps": 5424, "loss": 0.9234, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7558482517089617e-05, "epoch": 1.4, "percentage": 46.74, "elapsed_time": "7:22:47", "remaining_time": "8:24:37"} +{"current_steps": 2540, "total_steps": 5424, "loss": 0.9215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.748645174975421e-05, "epoch": 1.4, "percentage": 46.83, "elapsed_time": "7:23:40", "remaining_time": "8:23:45"} +{"current_steps": 2545, "total_steps": 5424, "loss": 0.959, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.74144001288627e-05, "epoch": 1.41, "percentage": 46.92, "elapsed_time": "7:24:32", "remaining_time": "8:22:52"} +{"current_steps": 2550, "total_steps": 5424, "loss": 0.9509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7342328258702894e-05, "epoch": 1.41, "percentage": 47.01, "elapsed_time": "7:25:24", "remaining_time": "8:22:00"} +{"current_steps": 2555, "total_steps": 5424, "loss": 0.8989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.727023674373246e-05, "epoch": 1.41, "percentage": 47.11, "elapsed_time": "7:26:17", "remaining_time": "8:21:07"} +{"current_steps": 2560, "total_steps": 5424, "loss": 0.9653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7198126188573807e-05, "epoch": 1.42, "percentage": 47.2, "elapsed_time": "7:27:09", "remaining_time": "8:20:15"} +{"current_steps": 2565, "total_steps": 5424, "loss": 0.9046, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7125997198009028e-05, "epoch": 1.42, "percentage": 47.29, "elapsed_time": "7:28:01", "remaining_time": "8:19:23"} +{"current_steps": 2570, "total_steps": 5424, "loss": 0.9318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7053850376974848e-05, "epoch": 1.42, "percentage": 47.38, "elapsed_time": "7:28:54", "remaining_time": "8:18:30"} +{"current_steps": 2575, "total_steps": 5424, "loss": 0.9292, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6981686330557516e-05, "epoch": 1.42, "percentage": 47.47, "elapsed_time": "7:29:46", "remaining_time": "8:17:38"} +{"current_steps": 2580, "total_steps": 5424, "loss": 0.99, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6909505663987756e-05, "epoch": 1.43, "percentage": 47.57, "elapsed_time": "7:30:38", "remaining_time": "8:16:45"} +{"current_steps": 2585, "total_steps": 5424, "loss": 0.9737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6837308982635678e-05, "epoch": 1.43, "percentage": 47.66, "elapsed_time": "7:31:31", "remaining_time": "8:15:53"} +{"current_steps": 2590, "total_steps": 5424, "loss": 0.9649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6765096892005726e-05, "epoch": 1.43, "percentage": 47.75, "elapsed_time": "7:32:23", "remaining_time": "8:15:00"} +{"current_steps": 2595, "total_steps": 5424, "loss": 0.9687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6692869997731545e-05, "epoch": 1.43, "percentage": 47.84, "elapsed_time": "7:33:15", "remaining_time": "8:14:08"} +{"current_steps": 2600, "total_steps": 5424, "loss": 0.9708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6620628905570964e-05, "epoch": 1.44, "percentage": 47.94, "elapsed_time": "7:34:08", "remaining_time": "8:13:15"} +{"current_steps": 2605, "total_steps": 5424, "loss": 0.9498, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6548374221400884e-05, "epoch": 1.44, "percentage": 48.03, "elapsed_time": "7:35:00", "remaining_time": "8:12:23"} +{"current_steps": 2610, "total_steps": 5424, "loss": 0.9512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6476106551212188e-05, "epoch": 1.44, "percentage": 48.12, "elapsed_time": "7:35:52", "remaining_time": "8:11:30"} +{"current_steps": 2615, "total_steps": 5424, "loss": 0.964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6403826501104682e-05, "epoch": 1.45, "percentage": 48.21, "elapsed_time": "7:36:45", "remaining_time": "8:10:38"} +{"current_steps": 2620, "total_steps": 5424, "loss": 0.9321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6331534677281998e-05, "epoch": 1.45, "percentage": 48.3, "elapsed_time": "7:37:37", "remaining_time": "8:09:46"} +{"current_steps": 2625, "total_steps": 5424, "loss": 0.9032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6259231686046508e-05, "epoch": 1.45, "percentage": 48.4, "elapsed_time": "7:38:30", "remaining_time": "8:08:53"} +{"current_steps": 2630, "total_steps": 5424, "loss": 0.9543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6186918133794252e-05, "epoch": 1.45, "percentage": 48.49, "elapsed_time": "7:39:22", "remaining_time": "8:08:01"} +{"current_steps": 2635, "total_steps": 5424, "loss": 0.9355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6114594627009847e-05, "epoch": 1.46, "percentage": 48.58, "elapsed_time": "7:40:14", "remaining_time": "8:07:08"} +{"current_steps": 2640, "total_steps": 5424, "loss": 0.9111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.604226177226137e-05, "epoch": 1.46, "percentage": 48.67, "elapsed_time": "7:41:07", "remaining_time": "8:06:16"} +{"current_steps": 2645, "total_steps": 5424, "loss": 0.9896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.596992017619534e-05, "epoch": 1.46, "percentage": 48.76, "elapsed_time": "7:41:59", "remaining_time": "8:05:23"} +{"current_steps": 2650, "total_steps": 5424, "loss": 0.9715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.589757044553155e-05, "epoch": 1.47, "percentage": 48.86, "elapsed_time": "7:42:51", "remaining_time": "8:04:31"} +{"current_steps": 2655, "total_steps": 5424, "loss": 0.8911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5825213187058045e-05, "epoch": 1.47, "percentage": 48.95, "elapsed_time": "7:43:44", "remaining_time": "8:03:38"} +{"current_steps": 2660, "total_steps": 5424, "loss": 0.9446, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5752849007625986e-05, "epoch": 1.47, "percentage": 49.04, "elapsed_time": "7:44:36", "remaining_time": "8:02:46"} +{"current_steps": 2665, "total_steps": 5424, "loss": 0.9453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.568047851414459e-05, "epoch": 1.47, "percentage": 49.13, "elapsed_time": "7:45:28", "remaining_time": "8:01:54"} +{"current_steps": 2670, "total_steps": 5424, "loss": 0.9244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5608102313576027e-05, "epoch": 1.48, "percentage": 49.23, "elapsed_time": "7:46:21", "remaining_time": "8:01:01"} +{"current_steps": 2675, "total_steps": 5424, "loss": 0.9478, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.553572101293033e-05, "epoch": 1.48, "percentage": 49.32, "elapsed_time": "7:47:13", "remaining_time": "8:00:09"} +{"current_steps": 2680, "total_steps": 5424, "loss": 0.9708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.546333521926031e-05, "epoch": 1.48, "percentage": 49.41, "elapsed_time": "7:48:06", "remaining_time": "7:59:16"} +{"current_steps": 2685, "total_steps": 5424, "loss": 0.9266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5390945539656445e-05, "epoch": 1.48, "percentage": 49.5, "elapsed_time": "7:48:58", "remaining_time": "7:58:24"} +{"current_steps": 2690, "total_steps": 5424, "loss": 0.9199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5318552581241822e-05, "epoch": 1.49, "percentage": 49.59, "elapsed_time": "7:49:50", "remaining_time": "7:57:32"} +{"current_steps": 2695, "total_steps": 5424, "loss": 0.9666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.524615695116702e-05, "epoch": 1.49, "percentage": 49.69, "elapsed_time": "7:50:43", "remaining_time": "7:56:39"} +{"current_steps": 2700, "total_steps": 5424, "loss": 0.9247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5173759256605027e-05, "epoch": 1.49, "percentage": 49.78, "elapsed_time": "7:51:35", "remaining_time": "7:55:47"} +{"current_steps": 2705, "total_steps": 5424, "loss": 0.9534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.510136010474614e-05, "epoch": 1.5, "percentage": 49.87, "elapsed_time": "7:52:28", "remaining_time": "7:54:54"} +{"current_steps": 2710, "total_steps": 5424, "loss": 0.9502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5028960102792887e-05, "epoch": 1.5, "percentage": 49.96, "elapsed_time": "7:53:20", "remaining_time": "7:54:02"} +{"current_steps": 2715, "total_steps": 5424, "loss": 0.9331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4971039897207112e-05, "epoch": 1.5, "percentage": 50.06, "elapsed_time": "7:54:12", "remaining_time": "7:53:09"} +{"current_steps": 2720, "total_steps": 5424, "loss": 0.9806, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4898639895253865e-05, "epoch": 1.5, "percentage": 50.15, "elapsed_time": "7:55:05", "remaining_time": "7:52:17"} +{"current_steps": 2725, "total_steps": 5424, "loss": 0.9525, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4826240743394982e-05, "epoch": 1.51, "percentage": 50.24, "elapsed_time": "7:55:57", "remaining_time": "7:51:25"} +{"current_steps": 2730, "total_steps": 5424, "loss": 0.937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4753843048832985e-05, "epoch": 1.51, "percentage": 50.33, "elapsed_time": "7:56:49", "remaining_time": "7:50:32"} +{"current_steps": 2735, "total_steps": 5424, "loss": 0.935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4681447418758187e-05, "epoch": 1.51, "percentage": 50.42, "elapsed_time": "7:57:42", "remaining_time": "7:49:40"} +{"current_steps": 2740, "total_steps": 5424, "loss": 0.9662, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.460905446034356e-05, "epoch": 1.52, "percentage": 50.52, "elapsed_time": "7:58:34", "remaining_time": "7:48:47"} +{"current_steps": 2745, "total_steps": 5424, "loss": 0.9249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.45366647807397e-05, "epoch": 1.52, "percentage": 50.61, "elapsed_time": "7:59:27", "remaining_time": "7:47:55"} +{"current_steps": 2750, "total_steps": 5424, "loss": 0.9673, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.446427898706967e-05, "epoch": 1.52, "percentage": 50.7, "elapsed_time": "8:00:19", "remaining_time": "7:47:03"} +{"current_steps": 2755, "total_steps": 5424, "loss": 0.9316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.439189768642398e-05, "epoch": 1.52, "percentage": 50.79, "elapsed_time": "8:01:11", "remaining_time": "7:46:10"} +{"current_steps": 2760, "total_steps": 5424, "loss": 0.9297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.431952148585541e-05, "epoch": 1.53, "percentage": 50.88, "elapsed_time": "8:02:04", "remaining_time": "7:45:18"} +{"current_steps": 2765, "total_steps": 5424, "loss": 0.9278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.424715099237402e-05, "epoch": 1.53, "percentage": 50.98, "elapsed_time": "8:02:56", "remaining_time": "7:44:25"} +{"current_steps": 2770, "total_steps": 5424, "loss": 0.8954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4174786812941968e-05, "epoch": 1.53, "percentage": 51.07, "elapsed_time": "8:03:49", "remaining_time": "7:43:33"} +{"current_steps": 2775, "total_steps": 5424, "loss": 0.9586, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4102429554468456e-05, "epoch": 1.53, "percentage": 51.16, "elapsed_time": "8:04:41", "remaining_time": "7:42:41"} +{"current_steps": 2780, "total_steps": 5424, "loss": 0.9119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4030079823804673e-05, "epoch": 1.54, "percentage": 51.25, "elapsed_time": "8:05:33", "remaining_time": "7:41:48"} +{"current_steps": 2785, "total_steps": 5424, "loss": 0.8949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.395773822773863e-05, "epoch": 1.54, "percentage": 51.35, "elapsed_time": "8:06:26", "remaining_time": "7:40:56"} +{"current_steps": 2790, "total_steps": 5424, "loss": 0.9506, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3885405372990166e-05, "epoch": 1.54, "percentage": 51.44, "elapsed_time": "8:07:18", "remaining_time": "7:40:03"} +{"current_steps": 2795, "total_steps": 5424, "loss": 0.9087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3813081866205754e-05, "epoch": 1.55, "percentage": 51.53, "elapsed_time": "8:08:11", "remaining_time": "7:39:11"} +{"current_steps": 2800, "total_steps": 5424, "loss": 0.923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3740768313953494e-05, "epoch": 1.55, "percentage": 51.62, "elapsed_time": "8:09:03", "remaining_time": "7:38:19"} +{"current_steps": 2805, "total_steps": 5424, "loss": 1.0212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3668465322718004e-05, "epoch": 1.55, "percentage": 51.71, "elapsed_time": "8:09:55", "remaining_time": "7:37:26"} +{"current_steps": 2810, "total_steps": 5424, "loss": 0.9182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.359617349889532e-05, "epoch": 1.55, "percentage": 51.81, "elapsed_time": "8:10:48", "remaining_time": "7:36:34"} +{"current_steps": 2815, "total_steps": 5424, "loss": 0.9316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3523893448787818e-05, "epoch": 1.56, "percentage": 51.9, "elapsed_time": "8:11:40", "remaining_time": "7:35:41"} +{"current_steps": 2820, "total_steps": 5424, "loss": 0.94, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3451625778599122e-05, "epoch": 1.56, "percentage": 51.99, "elapsed_time": "8:12:33", "remaining_time": "7:34:49"} +{"current_steps": 2825, "total_steps": 5424, "loss": 0.9181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3379371094429038e-05, "epoch": 1.56, "percentage": 52.08, "elapsed_time": "8:13:25", "remaining_time": "7:33:56"} +{"current_steps": 2830, "total_steps": 5424, "loss": 0.937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3307130002268457e-05, "epoch": 1.56, "percentage": 52.18, "elapsed_time": "8:14:17", "remaining_time": "7:33:04"} +{"current_steps": 2835, "total_steps": 5424, "loss": 0.9026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3234903107994287e-05, "epoch": 1.57, "percentage": 52.27, "elapsed_time": "8:15:10", "remaining_time": "7:32:12"} +{"current_steps": 2840, "total_steps": 5424, "loss": 0.954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3162691017364317e-05, "epoch": 1.57, "percentage": 52.36, "elapsed_time": "8:16:02", "remaining_time": "7:31:19"} +{"current_steps": 2845, "total_steps": 5424, "loss": 0.9661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3090494336012253e-05, "epoch": 1.57, "percentage": 52.45, "elapsed_time": "8:16:54", "remaining_time": "7:30:27"} +{"current_steps": 2850, "total_steps": 5424, "loss": 0.9127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3018313669442483e-05, "epoch": 1.58, "percentage": 52.54, "elapsed_time": "8:17:47", "remaining_time": "7:29:34"} +{"current_steps": 2855, "total_steps": 5424, "loss": 0.9317, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2946149623025158e-05, "epoch": 1.58, "percentage": 52.64, "elapsed_time": "8:18:39", "remaining_time": "7:28:42"} +{"current_steps": 2860, "total_steps": 5424, "loss": 0.9856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2874002801990978e-05, "epoch": 1.58, "percentage": 52.73, "elapsed_time": "8:19:32", "remaining_time": "7:27:50"} +{"current_steps": 2865, "total_steps": 5424, "loss": 1.0021, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.28018738114262e-05, "epoch": 1.58, "percentage": 52.82, "elapsed_time": "8:20:24", "remaining_time": "7:26:57"} +{"current_steps": 2870, "total_steps": 5424, "loss": 0.9655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.272976325626755e-05, "epoch": 1.59, "percentage": 52.91, "elapsed_time": "8:21:16", "remaining_time": "7:26:05"} +{"current_steps": 2875, "total_steps": 5424, "loss": 0.9619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.265767174129711e-05, "epoch": 1.59, "percentage": 53.01, "elapsed_time": "8:22:09", "remaining_time": "7:25:12"} +{"current_steps": 2880, "total_steps": 5424, "loss": 0.9383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2585599871137313e-05, "epoch": 1.59, "percentage": 53.1, "elapsed_time": "8:23:01", "remaining_time": "7:24:20"} +{"current_steps": 2885, "total_steps": 5424, "loss": 0.9332, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.251354825024579e-05, "epoch": 1.6, "percentage": 53.19, "elapsed_time": "8:23:54", "remaining_time": "7:23:28"} +{"current_steps": 2890, "total_steps": 5424, "loss": 0.9544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.244151748291039e-05, "epoch": 1.6, "percentage": 53.28, "elapsed_time": "8:24:46", "remaining_time": "7:22:35"} +{"current_steps": 2895, "total_steps": 5424, "loss": 0.9343, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.236950817324401e-05, "epoch": 1.6, "percentage": 53.37, "elapsed_time": "8:25:38", "remaining_time": "7:21:43"} +{"current_steps": 2900, "total_steps": 5424, "loss": 0.9189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2297520925179647e-05, "epoch": 1.6, "percentage": 53.47, "elapsed_time": "8:26:31", "remaining_time": "7:20:50"} +{"current_steps": 2905, "total_steps": 5424, "loss": 0.9341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.222555634246521e-05, "epoch": 1.61, "percentage": 53.56, "elapsed_time": "8:27:23", "remaining_time": "7:19:58"} +{"current_steps": 2910, "total_steps": 5424, "loss": 0.9567, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.215361502865858e-05, "epoch": 1.61, "percentage": 53.65, "elapsed_time": "8:28:16", "remaining_time": "7:19:06"} +{"current_steps": 2915, "total_steps": 5424, "loss": 0.9047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2081697587122423e-05, "epoch": 1.61, "percentage": 53.74, "elapsed_time": "8:29:08", "remaining_time": "7:18:13"} +{"current_steps": 2920, "total_steps": 5424, "loss": 0.9126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.200980462101922e-05, "epoch": 1.61, "percentage": 53.83, "elapsed_time": "8:30:00", "remaining_time": "7:17:21"} +{"current_steps": 2925, "total_steps": 5424, "loss": 0.9523, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1937936733306195e-05, "epoch": 1.62, "percentage": 53.93, "elapsed_time": "8:30:53", "remaining_time": "7:16:29"} +{"current_steps": 2930, "total_steps": 5424, "loss": 0.8802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.18660945267302e-05, "epoch": 1.62, "percentage": 54.02, "elapsed_time": "8:31:45", "remaining_time": "7:15:36"} +{"current_steps": 2935, "total_steps": 5424, "loss": 0.9197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.179427860382276e-05, "epoch": 1.62, "percentage": 54.11, "elapsed_time": "8:32:38", "remaining_time": "7:14:44"} +{"current_steps": 2940, "total_steps": 5424, "loss": 0.9255, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1722489566894903e-05, "epoch": 1.63, "percentage": 54.2, "elapsed_time": "8:33:30", "remaining_time": "7:13:51"} +{"current_steps": 2945, "total_steps": 5424, "loss": 0.8921, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1650728018032206e-05, "epoch": 1.63, "percentage": 54.3, "elapsed_time": "8:34:23", "remaining_time": "7:12:59"} +{"current_steps": 2950, "total_steps": 5424, "loss": 0.9607, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.15789945590897e-05, "epoch": 1.63, "percentage": 54.39, "elapsed_time": "8:35:15", "remaining_time": "7:12:07"} +{"current_steps": 2955, "total_steps": 5424, "loss": 0.9755, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.150728979168683e-05, "epoch": 1.63, "percentage": 54.48, "elapsed_time": "8:36:07", "remaining_time": "7:11:14"} +{"current_steps": 2960, "total_steps": 5424, "loss": 0.9943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1435614317202384e-05, "epoch": 1.64, "percentage": 54.57, "elapsed_time": "8:37:00", "remaining_time": "7:10:22"} +{"current_steps": 2965, "total_steps": 5424, "loss": 0.9463, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1363968736769508e-05, "epoch": 1.64, "percentage": 54.66, "elapsed_time": "8:37:52", "remaining_time": "7:09:29"} +{"current_steps": 2970, "total_steps": 5424, "loss": 0.9107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1292353651270617e-05, "epoch": 1.64, "percentage": 54.76, "elapsed_time": "8:38:45", "remaining_time": "7:08:37"} +{"current_steps": 2975, "total_steps": 5424, "loss": 0.9311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1220769661332365e-05, "epoch": 1.65, "percentage": 54.85, "elapsed_time": "8:39:37", "remaining_time": "7:07:45"} +{"current_steps": 2980, "total_steps": 5424, "loss": 0.9459, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1149217367320622e-05, "epoch": 1.65, "percentage": 54.94, "elapsed_time": "8:40:30", "remaining_time": "7:06:52"} +{"current_steps": 2985, "total_steps": 5424, "loss": 0.9439, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.107769736933541e-05, "epoch": 1.65, "percentage": 55.03, "elapsed_time": "8:41:22", "remaining_time": "7:06:00"} +{"current_steps": 2990, "total_steps": 5424, "loss": 0.9719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.100621026720591e-05, "epoch": 1.65, "percentage": 55.13, "elapsed_time": "8:42:14", "remaining_time": "7:05:08"} +{"current_steps": 2995, "total_steps": 5424, "loss": 0.9569, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.093475666048539e-05, "epoch": 1.66, "percentage": 55.22, "elapsed_time": "8:43:07", "remaining_time": "7:04:15"} +{"current_steps": 3000, "total_steps": 5424, "loss": 0.9308, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0863337148446222e-05, "epoch": 1.66, "percentage": 55.31, "elapsed_time": "8:43:59", "remaining_time": "7:03:23"} +{"current_steps": 3005, "total_steps": 5424, "loss": 0.9269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.07919523300748e-05, "epoch": 1.66, "percentage": 55.4, "elapsed_time": "8:44:52", "remaining_time": "7:02:30"} +{"current_steps": 3010, "total_steps": 5424, "loss": 0.9358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0720602804066552e-05, "epoch": 1.66, "percentage": 55.49, "elapsed_time": "8:45:44", "remaining_time": "7:01:38"} +{"current_steps": 3015, "total_steps": 5424, "loss": 0.9291, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0649289168820943e-05, "epoch": 1.67, "percentage": 55.59, "elapsed_time": "8:46:36", "remaining_time": "7:00:46"} +{"current_steps": 3020, "total_steps": 5424, "loss": 0.969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0578012022436386e-05, "epoch": 1.67, "percentage": 55.68, "elapsed_time": "8:47:29", "remaining_time": "6:59:53"} +{"current_steps": 3025, "total_steps": 5424, "loss": 0.9319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0506771962705304e-05, "epoch": 1.67, "percentage": 55.77, "elapsed_time": "8:48:21", "remaining_time": "6:59:01"} +{"current_steps": 3030, "total_steps": 5424, "loss": 0.9574, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0435569587109042e-05, "epoch": 1.68, "percentage": 55.86, "elapsed_time": "8:49:14", "remaining_time": "6:58:08"} +{"current_steps": 3035, "total_steps": 5424, "loss": 0.9654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.036440549281293e-05, "epoch": 1.68, "percentage": 55.96, "elapsed_time": "8:50:06", "remaining_time": "6:57:16"} +{"current_steps": 3040, "total_steps": 5424, "loss": 0.9742, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0293280276661204e-05, "epoch": 1.68, "percentage": 56.05, "elapsed_time": "8:50:59", "remaining_time": "6:56:24"} +{"current_steps": 3045, "total_steps": 5424, "loss": 0.9014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0222194535172067e-05, "epoch": 1.68, "percentage": 56.14, "elapsed_time": "8:51:51", "remaining_time": "6:55:31"} +{"current_steps": 3050, "total_steps": 5424, "loss": 0.9246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0151148864532623e-05, "epoch": 1.69, "percentage": 56.23, "elapsed_time": "8:52:43", "remaining_time": "6:54:39"} +{"current_steps": 3055, "total_steps": 5424, "loss": 0.9522, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0080143860593913e-05, "epoch": 1.69, "percentage": 56.32, "elapsed_time": "8:53:36", "remaining_time": "6:53:47"} +{"current_steps": 3060, "total_steps": 5424, "loss": 0.9567, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0009180118865933e-05, "epoch": 1.69, "percentage": 56.42, "elapsed_time": "8:54:28", "remaining_time": "6:52:54"} +{"current_steps": 3065, "total_steps": 5424, "loss": 0.983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9938258234512588e-05, "epoch": 1.69, "percentage": 56.51, "elapsed_time": "8:55:21", "remaining_time": "6:52:02"} +{"current_steps": 3070, "total_steps": 5424, "loss": 0.8722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9867378802346764e-05, "epoch": 1.7, "percentage": 56.6, "elapsed_time": "8:56:13", "remaining_time": "6:51:10"} +{"current_steps": 3075, "total_steps": 5424, "loss": 0.9122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.979654241682527e-05, "epoch": 1.7, "percentage": 56.69, "elapsed_time": "8:57:06", "remaining_time": "6:50:17"} +{"current_steps": 3080, "total_steps": 5424, "loss": 0.9362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.972574967204391e-05, "epoch": 1.7, "percentage": 56.78, "elapsed_time": "8:57:59", "remaining_time": "6:49:25"} +{"current_steps": 3085, "total_steps": 5424, "loss": 0.8944, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9655001161732478e-05, "epoch": 1.71, "percentage": 56.88, "elapsed_time": "8:58:51", "remaining_time": "6:48:33"} +{"current_steps": 3090, "total_steps": 5424, "loss": 0.9329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9584297479249774e-05, "epoch": 1.71, "percentage": 56.97, "elapsed_time": "8:59:44", "remaining_time": "6:47:41"} +{"current_steps": 3095, "total_steps": 5424, "loss": 0.9707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9513639217578636e-05, "epoch": 1.71, "percentage": 57.06, "elapsed_time": "9:00:37", "remaining_time": "6:46:49"} +{"current_steps": 3100, "total_steps": 5424, "loss": 0.9367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9443026969320955e-05, "epoch": 1.71, "percentage": 57.15, "elapsed_time": "9:01:29", "remaining_time": "6:45:56"} +{"current_steps": 3105, "total_steps": 5424, "loss": 0.896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.937246132669272e-05, "epoch": 1.72, "percentage": 57.25, "elapsed_time": "9:02:22", "remaining_time": "6:45:04"} +{"current_steps": 3110, "total_steps": 5424, "loss": 0.97, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9301942881519047e-05, "epoch": 1.72, "percentage": 57.34, "elapsed_time": "9:03:14", "remaining_time": "6:44:12"} +{"current_steps": 3115, "total_steps": 5424, "loss": 0.9638, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9231472225229216e-05, "epoch": 1.72, "percentage": 57.43, "elapsed_time": "9:04:07", "remaining_time": "6:43:20"} +{"current_steps": 3120, "total_steps": 5424, "loss": 0.9561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9161049948851684e-05, "epoch": 1.73, "percentage": 57.52, "elapsed_time": "9:05:00", "remaining_time": "6:42:27"} +{"current_steps": 3125, "total_steps": 5424, "loss": 0.9734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9090676643009193e-05, "epoch": 1.73, "percentage": 57.61, "elapsed_time": "9:05:52", "remaining_time": "6:41:35"} +{"current_steps": 3130, "total_steps": 5424, "loss": 0.9651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.902035289791373e-05, "epoch": 1.73, "percentage": 57.71, "elapsed_time": "9:06:45", "remaining_time": "6:40:43"} +{"current_steps": 3135, "total_steps": 5424, "loss": 0.9489, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8950079303361658e-05, "epoch": 1.73, "percentage": 57.8, "elapsed_time": "9:07:37", "remaining_time": "6:39:50"} +{"current_steps": 3140, "total_steps": 5424, "loss": 0.9893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8879856448728723e-05, "epoch": 1.74, "percentage": 57.89, "elapsed_time": "9:08:30", "remaining_time": "6:38:58"} +{"current_steps": 3145, "total_steps": 5424, "loss": 0.9549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8809684922965097e-05, "epoch": 1.74, "percentage": 57.98, "elapsed_time": "9:09:22", "remaining_time": "6:38:06"} +{"current_steps": 3150, "total_steps": 5424, "loss": 0.9196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8739565314590507e-05, "epoch": 1.74, "percentage": 58.08, "elapsed_time": "9:10:15", "remaining_time": "6:37:13"} +{"current_steps": 3155, "total_steps": 5424, "loss": 0.9568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8669498211689216e-05, "epoch": 1.74, "percentage": 58.17, "elapsed_time": "9:11:07", "remaining_time": "6:36:21"} +{"current_steps": 3160, "total_steps": 5424, "loss": 0.904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.859948420190517e-05, "epoch": 1.75, "percentage": 58.26, "elapsed_time": "9:12:00", "remaining_time": "6:35:29"} +{"current_steps": 3165, "total_steps": 5424, "loss": 0.9375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.852952387243698e-05, "epoch": 1.75, "percentage": 58.35, "elapsed_time": "9:12:52", "remaining_time": "6:34:36"} +{"current_steps": 3170, "total_steps": 5424, "loss": 0.9048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8459617810033096e-05, "epoch": 1.75, "percentage": 58.44, "elapsed_time": "9:13:45", "remaining_time": "6:33:44"} +{"current_steps": 3175, "total_steps": 5424, "loss": 0.9514, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.83897666009868e-05, "epoch": 1.76, "percentage": 58.54, "elapsed_time": "9:14:37", "remaining_time": "6:32:52"} +{"current_steps": 3180, "total_steps": 5424, "loss": 0.8855, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8319970831131363e-05, "epoch": 1.76, "percentage": 58.63, "elapsed_time": "9:15:30", "remaining_time": "6:31:59"} +{"current_steps": 3185, "total_steps": 5424, "loss": 0.9119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.825023108583505e-05, "epoch": 1.76, "percentage": 58.72, "elapsed_time": "9:16:23", "remaining_time": "6:31:07"} +{"current_steps": 3190, "total_steps": 5424, "loss": 0.9733, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.818054794999628e-05, "epoch": 1.76, "percentage": 58.81, "elapsed_time": "9:17:15", "remaining_time": "6:30:15"} +{"current_steps": 3195, "total_steps": 5424, "loss": 0.9597, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8110922008038705e-05, "epoch": 1.77, "percentage": 58.9, "elapsed_time": "9:18:08", "remaining_time": "6:29:22"} +{"current_steps": 3200, "total_steps": 5424, "loss": 0.8952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8041353843906275e-05, "epoch": 1.77, "percentage": 59.0, "elapsed_time": "9:19:00", "remaining_time": "6:28:30"} +{"current_steps": 3205, "total_steps": 5424, "loss": 0.8934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.797184404105839e-05, "epoch": 1.77, "percentage": 59.09, "elapsed_time": "9:19:53", "remaining_time": "6:27:38"} +{"current_steps": 3210, "total_steps": 5424, "loss": 0.9299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7902393182464955e-05, "epoch": 1.77, "percentage": 59.18, "elapsed_time": "9:20:45", "remaining_time": "6:26:46"} +{"current_steps": 3215, "total_steps": 5424, "loss": 0.9247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7833001850601544e-05, "epoch": 1.78, "percentage": 59.27, "elapsed_time": "9:21:38", "remaining_time": "6:25:53"} +{"current_steps": 3220, "total_steps": 5424, "loss": 0.9672, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7763670627444465e-05, "epoch": 1.78, "percentage": 59.37, "elapsed_time": "9:22:30", "remaining_time": "6:25:01"} +{"current_steps": 3225, "total_steps": 5424, "loss": 0.9451, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7694400094465913e-05, "epoch": 1.78, "percentage": 59.46, "elapsed_time": "9:23:23", "remaining_time": "6:24:09"} +{"current_steps": 3230, "total_steps": 5424, "loss": 0.9294, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7625190832629085e-05, "epoch": 1.79, "percentage": 59.55, "elapsed_time": "9:24:15", "remaining_time": "6:23:16"} +{"current_steps": 3235, "total_steps": 5424, "loss": 0.9145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7556043422383293e-05, "epoch": 1.79, "percentage": 59.64, "elapsed_time": "9:25:08", "remaining_time": "6:22:24"} +{"current_steps": 3240, "total_steps": 5424, "loss": 0.9508, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7486958443659112e-05, "epoch": 1.79, "percentage": 59.73, "elapsed_time": "9:26:00", "remaining_time": "6:21:32"} +{"current_steps": 3245, "total_steps": 5424, "loss": 0.8725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7417936475863526e-05, "epoch": 1.79, "percentage": 59.83, "elapsed_time": "9:26:53", "remaining_time": "6:20:39"} +{"current_steps": 3250, "total_steps": 5424, "loss": 0.9195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7348978097875036e-05, "epoch": 1.8, "percentage": 59.92, "elapsed_time": "9:27:45", "remaining_time": "6:19:47"} +{"current_steps": 3255, "total_steps": 5424, "loss": 0.933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.728008388803883e-05, "epoch": 1.8, "percentage": 60.01, "elapsed_time": "9:28:38", "remaining_time": "6:18:55"} +{"current_steps": 3260, "total_steps": 5424, "loss": 0.9747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7211254424161933e-05, "epoch": 1.8, "percentage": 60.1, "elapsed_time": "9:29:31", "remaining_time": "6:18:02"} +{"current_steps": 3265, "total_steps": 5424, "loss": 0.9168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7142490283508324e-05, "epoch": 1.81, "percentage": 60.2, "elapsed_time": "9:30:23", "remaining_time": "6:17:10"} +{"current_steps": 3270, "total_steps": 5424, "loss": 0.9844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.707379204279418e-05, "epoch": 1.81, "percentage": 60.29, "elapsed_time": "9:31:16", "remaining_time": "6:16:18"} +{"current_steps": 3275, "total_steps": 5424, "loss": 0.9071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.700516027818293e-05, "epoch": 1.81, "percentage": 60.38, "elapsed_time": "9:32:09", "remaining_time": "6:15:26"} +{"current_steps": 3280, "total_steps": 5424, "loss": 0.9311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6936595565280488e-05, "epoch": 1.81, "percentage": 60.47, "elapsed_time": "9:33:01", "remaining_time": "6:14:33"} +{"current_steps": 3285, "total_steps": 5424, "loss": 0.9419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.686809847913045e-05, "epoch": 1.82, "percentage": 60.56, "elapsed_time": "9:33:54", "remaining_time": "6:13:41"} +{"current_steps": 3290, "total_steps": 5424, "loss": 0.9679, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.679966959420918e-05, "epoch": 1.82, "percentage": 60.66, "elapsed_time": "9:34:47", "remaining_time": "6:12:49"} +{"current_steps": 3295, "total_steps": 5424, "loss": 0.9601, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.67313094844211e-05, "epoch": 1.82, "percentage": 60.75, "elapsed_time": "9:35:39", "remaining_time": "6:11:57"} +{"current_steps": 3300, "total_steps": 5424, "loss": 0.9022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6663018723093774e-05, "epoch": 1.82, "percentage": 60.84, "elapsed_time": "9:36:32", "remaining_time": "6:11:05"} +{"current_steps": 3305, "total_steps": 5424, "loss": 0.8925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6594797882973196e-05, "epoch": 1.83, "percentage": 60.93, "elapsed_time": "9:37:25", "remaining_time": "6:10:12"} +{"current_steps": 3310, "total_steps": 5424, "loss": 0.9238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6526647536218894e-05, "epoch": 1.83, "percentage": 61.03, "elapsed_time": "9:38:18", "remaining_time": "6:09:20"} +{"current_steps": 3315, "total_steps": 5424, "loss": 0.9632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6458568254399225e-05, "epoch": 1.83, "percentage": 61.12, "elapsed_time": "9:39:10", "remaining_time": "6:08:28"} +{"current_steps": 3320, "total_steps": 5424, "loss": 0.9164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6390560608486496e-05, "epoch": 1.84, "percentage": 61.21, "elapsed_time": "9:40:03", "remaining_time": "6:07:36"} +{"current_steps": 3325, "total_steps": 5424, "loss": 0.9505, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6322625168852217e-05, "epoch": 1.84, "percentage": 61.3, "elapsed_time": "9:40:56", "remaining_time": "6:06:43"} +{"current_steps": 3330, "total_steps": 5424, "loss": 0.9622, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6254762505262338e-05, "epoch": 1.84, "percentage": 61.39, "elapsed_time": "9:41:48", "remaining_time": "6:05:51"} +{"current_steps": 3335, "total_steps": 5424, "loss": 0.9204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.618697318687241e-05, "epoch": 1.84, "percentage": 61.49, "elapsed_time": "9:42:41", "remaining_time": "6:04:59"} +{"current_steps": 3340, "total_steps": 5424, "loss": 0.9504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6119257782222895e-05, "epoch": 1.85, "percentage": 61.58, "elapsed_time": "9:43:34", "remaining_time": "6:04:07"} +{"current_steps": 3345, "total_steps": 5424, "loss": 0.9384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6051616859234285e-05, "epoch": 1.85, "percentage": 61.67, "elapsed_time": "9:44:27", "remaining_time": "6:03:15"} +{"current_steps": 3350, "total_steps": 5424, "loss": 0.9374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5984050985202474e-05, "epoch": 1.85, "percentage": 61.76, "elapsed_time": "9:45:19", "remaining_time": "6:02:22"} +{"current_steps": 3355, "total_steps": 5424, "loss": 0.9937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.591656072679387e-05, "epoch": 1.86, "percentage": 61.85, "elapsed_time": "9:46:12", "remaining_time": "6:01:30"} +{"current_steps": 3360, "total_steps": 5424, "loss": 0.9587, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5849146650040737e-05, "epoch": 1.86, "percentage": 61.95, "elapsed_time": "9:47:05", "remaining_time": "6:00:38"} +{"current_steps": 3365, "total_steps": 5424, "loss": 0.9312, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5781809320336412e-05, "epoch": 1.86, "percentage": 62.04, "elapsed_time": "9:47:58", "remaining_time": "5:59:46"} +{"current_steps": 3370, "total_steps": 5424, "loss": 0.9343, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5714549302430536e-05, "epoch": 1.86, "percentage": 62.13, "elapsed_time": "9:48:50", "remaining_time": "5:58:53"} +{"current_steps": 3375, "total_steps": 5424, "loss": 0.9419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5647367160424393e-05, "epoch": 1.87, "percentage": 62.22, "elapsed_time": "9:49:43", "remaining_time": "5:58:01"} +{"current_steps": 3380, "total_steps": 5424, "loss": 0.9371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.558026345776608e-05, "epoch": 1.87, "percentage": 62.32, "elapsed_time": "9:50:36", "remaining_time": "5:57:09"} +{"current_steps": 3385, "total_steps": 5424, "loss": 0.913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.551323875724587e-05, "epoch": 1.87, "percentage": 62.41, "elapsed_time": "9:51:28", "remaining_time": "5:56:17"} +{"current_steps": 3390, "total_steps": 5424, "loss": 0.9195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5446293620991437e-05, "epoch": 1.87, "percentage": 62.5, "elapsed_time": "9:52:21", "remaining_time": "5:55:24"} +{"current_steps": 3395, "total_steps": 5424, "loss": 0.9164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5379428610463174e-05, "epoch": 1.88, "percentage": 62.59, "elapsed_time": "9:53:13", "remaining_time": "5:54:32"} +{"current_steps": 3400, "total_steps": 5424, "loss": 0.9256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.531264428644945e-05, "epoch": 1.88, "percentage": 62.68, "elapsed_time": "9:54:06", "remaining_time": "5:53:40"} +{"current_steps": 3405, "total_steps": 5424, "loss": 0.9658, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5245941209061953e-05, "epoch": 1.88, "percentage": 62.78, "elapsed_time": "9:54:59", "remaining_time": "5:52:47"} +{"current_steps": 3410, "total_steps": 5424, "loss": 0.9397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.517931993773094e-05, "epoch": 1.89, "percentage": 62.87, "elapsed_time": "9:55:51", "remaining_time": "5:51:55"} +{"current_steps": 3415, "total_steps": 5424, "loss": 0.915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5112781031200569e-05, "epoch": 1.89, "percentage": 62.96, "elapsed_time": "9:56:44", "remaining_time": "5:51:03"} +{"current_steps": 3420, "total_steps": 5424, "loss": 0.9116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5046325047524251e-05, "epoch": 1.89, "percentage": 63.05, "elapsed_time": "9:57:37", "remaining_time": "5:50:10"} +{"current_steps": 3425, "total_steps": 5424, "loss": 0.9192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4979952544059888e-05, "epoch": 1.89, "percentage": 63.15, "elapsed_time": "9:58:29", "remaining_time": "5:49:18"} +{"current_steps": 3430, "total_steps": 5424, "loss": 0.924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4913664077465289e-05, "epoch": 1.9, "percentage": 63.24, "elapsed_time": "9:59:22", "remaining_time": "5:48:26"} +{"current_steps": 3435, "total_steps": 5424, "loss": 0.9579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4847460203693408e-05, "epoch": 1.9, "percentage": 63.33, "elapsed_time": "10:00:14", "remaining_time": "5:47:33"} +{"current_steps": 3440, "total_steps": 5424, "loss": 0.9816, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4781341477987776e-05, "epoch": 1.9, "percentage": 63.42, "elapsed_time": "10:01:07", "remaining_time": "5:46:41"} +{"current_steps": 3445, "total_steps": 5424, "loss": 0.9412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4715308454877758e-05, "epoch": 1.9, "percentage": 63.51, "elapsed_time": "10:01:59", "remaining_time": "5:45:49"} +{"current_steps": 3450, "total_steps": 5424, "loss": 0.9413, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4649361688173979e-05, "epoch": 1.91, "percentage": 63.61, "elapsed_time": "10:02:52", "remaining_time": "5:44:56"} +{"current_steps": 3455, "total_steps": 5424, "loss": 0.9425, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.458350173096361e-05, "epoch": 1.91, "percentage": 63.7, "elapsed_time": "10:03:45", "remaining_time": "5:44:04"} +{"current_steps": 3460, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4517729135605795e-05, "epoch": 1.91, "percentage": 63.79, "elapsed_time": "10:04:37", "remaining_time": "5:43:12"} +{"current_steps": 3465, "total_steps": 5424, "loss": 0.9455, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4452044453726942e-05, "epoch": 1.92, "percentage": 63.88, "elapsed_time": "10:05:30", "remaining_time": "5:42:20"} +{"current_steps": 3470, "total_steps": 5424, "loss": 0.9543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4386448236216174e-05, "epoch": 1.92, "percentage": 63.97, "elapsed_time": "10:06:22", "remaining_time": "5:41:27"} +{"current_steps": 3475, "total_steps": 5424, "loss": 0.9118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4320941033220667e-05, "epoch": 1.92, "percentage": 64.07, "elapsed_time": "10:07:15", "remaining_time": "5:40:35"} +{"current_steps": 3480, "total_steps": 5424, "loss": 0.9425, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4255523394141041e-05, "epoch": 1.92, "percentage": 64.16, "elapsed_time": "10:08:08", "remaining_time": "5:39:43"} +{"current_steps": 3485, "total_steps": 5424, "loss": 0.8958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4190195867626749e-05, "epoch": 1.93, "percentage": 64.25, "elapsed_time": "10:09:00", "remaining_time": "5:38:50"} +{"current_steps": 3490, "total_steps": 5424, "loss": 0.944, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4124959001571497e-05, "epoch": 1.93, "percentage": 64.34, "elapsed_time": "10:09:53", "remaining_time": "5:37:58"} +{"current_steps": 3495, "total_steps": 5424, "loss": 0.9611, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4059813343108616e-05, "epoch": 1.93, "percentage": 64.44, "elapsed_time": "10:10:45", "remaining_time": "5:37:05"} +{"current_steps": 3500, "total_steps": 5424, "loss": 0.9448, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3994759438606501e-05, "epoch": 1.94, "percentage": 64.53, "elapsed_time": "10:11:38", "remaining_time": "5:36:13"} +{"current_steps": 3505, "total_steps": 5424, "loss": 0.9659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3929797833664013e-05, "epoch": 1.94, "percentage": 64.62, "elapsed_time": "10:12:31", "remaining_time": "5:35:21"} +{"current_steps": 3510, "total_steps": 5424, "loss": 0.9178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3864929073105922e-05, "epoch": 1.94, "percentage": 64.71, "elapsed_time": "10:13:23", "remaining_time": "5:34:29"} +{"current_steps": 3515, "total_steps": 5424, "loss": 0.8965, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3800153700978282e-05, "epoch": 1.94, "percentage": 64.8, "elapsed_time": "10:14:16", "remaining_time": "5:33:36"} +{"current_steps": 3520, "total_steps": 5424, "loss": 0.9198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.373547226054398e-05, "epoch": 1.95, "percentage": 64.9, "elapsed_time": "10:15:09", "remaining_time": "5:32:44"} +{"current_steps": 3525, "total_steps": 5424, "loss": 0.9398, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.367088529427803e-05, "epoch": 1.95, "percentage": 64.99, "elapsed_time": "10:16:01", "remaining_time": "5:31:52"} +{"current_steps": 3530, "total_steps": 5424, "loss": 0.9423, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3606393343863182e-05, "epoch": 1.95, "percentage": 65.08, "elapsed_time": "10:16:54", "remaining_time": "5:30:59"} +{"current_steps": 3535, "total_steps": 5424, "loss": 0.9592, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3541996950185227e-05, "epoch": 1.95, "percentage": 65.17, "elapsed_time": "10:17:46", "remaining_time": "5:30:07"} +{"current_steps": 3540, "total_steps": 5424, "loss": 0.9489, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3477696653328598e-05, "epoch": 1.96, "percentage": 65.27, "elapsed_time": "10:18:39", "remaining_time": "5:29:15"} +{"current_steps": 3545, "total_steps": 5424, "loss": 0.963, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3413492992571713e-05, "epoch": 1.96, "percentage": 65.36, "elapsed_time": "10:19:31", "remaining_time": "5:28:22"} +{"current_steps": 3550, "total_steps": 5424, "loss": 0.9449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3349386506382586e-05, "epoch": 1.96, "percentage": 65.45, "elapsed_time": "10:20:24", "remaining_time": "5:27:30"} +{"current_steps": 3555, "total_steps": 5424, "loss": 0.9043, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3285377732414172e-05, "epoch": 1.97, "percentage": 65.54, "elapsed_time": "10:21:16", "remaining_time": "5:26:37"} +{"current_steps": 3560, "total_steps": 5424, "loss": 0.9362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3221467207499972e-05, "epoch": 1.97, "percentage": 65.63, "elapsed_time": "10:22:09", "remaining_time": "5:25:45"} +{"current_steps": 3565, "total_steps": 5424, "loss": 0.945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3157655467649463e-05, "epoch": 1.97, "percentage": 65.73, "elapsed_time": "10:23:01", "remaining_time": "5:24:53"} +{"current_steps": 3570, "total_steps": 5424, "loss": 0.8693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3093943048043634e-05, "epoch": 1.97, "percentage": 65.82, "elapsed_time": "10:23:54", "remaining_time": "5:24:00"} +{"current_steps": 3575, "total_steps": 5424, "loss": 0.9659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3030330483030479e-05, "epoch": 1.98, "percentage": 65.91, "elapsed_time": "10:24:46", "remaining_time": "5:23:08"} +{"current_steps": 3580, "total_steps": 5424, "loss": 0.9036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2966818306120535e-05, "epoch": 1.98, "percentage": 66.0, "elapsed_time": "10:25:39", "remaining_time": "5:22:15"} +{"current_steps": 3585, "total_steps": 5424, "loss": 0.9282, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2903407049982386e-05, "epoch": 1.98, "percentage": 66.1, "elapsed_time": "10:26:31", "remaining_time": "5:21:23"} +{"current_steps": 3590, "total_steps": 5424, "loss": 0.8747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2840097246438215e-05, "epoch": 1.99, "percentage": 66.19, "elapsed_time": "10:27:24", "remaining_time": "5:20:31"} +{"current_steps": 3595, "total_steps": 5424, "loss": 0.936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.277688942645934e-05, "epoch": 1.99, "percentage": 66.28, "elapsed_time": "10:28:16", "remaining_time": "5:19:38"} +{"current_steps": 3600, "total_steps": 5424, "loss": 0.8945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2713784120161725e-05, "epoch": 1.99, "percentage": 66.37, "elapsed_time": "10:29:09", "remaining_time": "5:18:46"} +{"current_steps": 3605, "total_steps": 5424, "loss": 0.9273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2650781856801598e-05, "epoch": 1.99, "percentage": 66.46, "elapsed_time": "10:30:01", "remaining_time": "5:17:53"} +{"current_steps": 3610, "total_steps": 5424, "loss": 0.9632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.258788316477097e-05, "epoch": 2.0, "percentage": 66.56, "elapsed_time": "10:30:54", "remaining_time": "5:17:01"} +{"current_steps": 3615, "total_steps": 5424, "loss": 0.8641, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2525088571593202e-05, "epoch": 2.0, "percentage": 66.65, "elapsed_time": "10:31:46", "remaining_time": "5:16:09"} +{"current_steps": 3620, "total_steps": 5424, "loss": 0.9314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2462398603918607e-05, "epoch": 2.0, "percentage": 66.74, "elapsed_time": "10:32:38", "remaining_time": "5:15:16"} +{"current_steps": 3625, "total_steps": 5424, "loss": 0.9169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2399813787520006e-05, "epoch": 2.0, "percentage": 66.83, "elapsed_time": "10:33:31", "remaining_time": "5:14:24"} +{"current_steps": 3630, "total_steps": 5424, "loss": 0.9089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2337334647288334e-05, "epoch": 2.01, "percentage": 66.92, "elapsed_time": "10:34:23", "remaining_time": "5:13:31"} +{"current_steps": 3635, "total_steps": 5424, "loss": 0.9039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2274961707228228e-05, "epoch": 2.01, "percentage": 67.02, "elapsed_time": "10:35:16", "remaining_time": "5:12:39"} +{"current_steps": 3640, "total_steps": 5424, "loss": 0.9378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2212695490453646e-05, "epoch": 2.01, "percentage": 67.11, "elapsed_time": "10:36:08", "remaining_time": "5:11:46"} +{"current_steps": 3645, "total_steps": 5424, "loss": 0.9297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2150536519183475e-05, "epoch": 2.02, "percentage": 67.2, "elapsed_time": "10:37:01", "remaining_time": "5:10:54"} +{"current_steps": 3650, "total_steps": 5424, "loss": 0.9488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2088485314737108e-05, "epoch": 2.02, "percentage": 67.29, "elapsed_time": "10:37:54", "remaining_time": "5:10:02"} +{"current_steps": 3655, "total_steps": 5424, "loss": 0.9625, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2026542397530186e-05, "epoch": 2.02, "percentage": 67.39, "elapsed_time": "10:38:46", "remaining_time": "5:09:09"} +{"current_steps": 3660, "total_steps": 5424, "loss": 0.8874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1964708287070073e-05, "epoch": 2.02, "percentage": 67.48, "elapsed_time": "10:39:39", "remaining_time": "5:08:17"} +{"current_steps": 3665, "total_steps": 5424, "loss": 0.9224, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1902983501951666e-05, "epoch": 2.03, "percentage": 67.57, "elapsed_time": "10:40:31", "remaining_time": "5:07:25"} +{"current_steps": 3670, "total_steps": 5424, "loss": 0.9442, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1841368559852892e-05, "epoch": 2.03, "percentage": 67.66, "elapsed_time": "10:41:24", "remaining_time": "5:06:32"} +{"current_steps": 3675, "total_steps": 5424, "loss": 0.8688, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.17798639775305e-05, "epoch": 2.03, "percentage": 67.75, "elapsed_time": "10:42:16", "remaining_time": "5:05:40"} +{"current_steps": 3680, "total_steps": 5424, "loss": 0.8912, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1718470270815608e-05, "epoch": 2.03, "percentage": 67.85, "elapsed_time": "10:43:09", "remaining_time": "5:04:48"} +{"current_steps": 3685, "total_steps": 5424, "loss": 0.9131, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1657187954609496e-05, "epoch": 2.04, "percentage": 67.94, "elapsed_time": "10:44:02", "remaining_time": "5:03:55"} +{"current_steps": 3690, "total_steps": 5424, "loss": 0.9827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1596017542879168e-05, "epoch": 2.04, "percentage": 68.03, "elapsed_time": "10:44:54", "remaining_time": "5:03:03"} +{"current_steps": 3695, "total_steps": 5424, "loss": 0.8588, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1534959548653132e-05, "epoch": 2.04, "percentage": 68.12, "elapsed_time": "10:45:47", "remaining_time": "5:02:10"} +{"current_steps": 3700, "total_steps": 5424, "loss": 0.9359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.147401448401706e-05, "epoch": 2.05, "percentage": 68.22, "elapsed_time": "10:46:39", "remaining_time": "5:01:18"} +{"current_steps": 3705, "total_steps": 5424, "loss": 0.9375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1413182860109491e-05, "epoch": 2.05, "percentage": 68.31, "elapsed_time": "10:47:32", "remaining_time": "5:00:26"} +{"current_steps": 3710, "total_steps": 5424, "loss": 0.9242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1352465187117562e-05, "epoch": 2.05, "percentage": 68.4, "elapsed_time": "10:48:24", "remaining_time": "4:59:33"} +{"current_steps": 3715, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1291861974272703e-05, "epoch": 2.05, "percentage": 68.49, "elapsed_time": "10:49:17", "remaining_time": "4:58:41"} +{"current_steps": 3720, "total_steps": 5424, "loss": 0.9142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1231373729846393e-05, "epoch": 2.06, "percentage": 68.58, "elapsed_time": "10:50:10", "remaining_time": "4:57:49"} +{"current_steps": 3725, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1171000961145883e-05, "epoch": 2.06, "percentage": 68.68, "elapsed_time": "10:51:02", "remaining_time": "4:56:56"} +{"current_steps": 3730, "total_steps": 5424, "loss": 0.9603, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1110744174509952e-05, "epoch": 2.06, "percentage": 68.77, "elapsed_time": "10:51:55", "remaining_time": "4:56:04"} +{"current_steps": 3735, "total_steps": 5424, "loss": 0.9442, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1050603875304622e-05, "epoch": 2.07, "percentage": 68.86, "elapsed_time": "10:52:47", "remaining_time": "4:55:12"} +{"current_steps": 3740, "total_steps": 5424, "loss": 0.9392, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0990580567918979e-05, "epoch": 2.07, "percentage": 68.95, "elapsed_time": "10:53:40", "remaining_time": "4:54:19"} +{"current_steps": 3745, "total_steps": 5424, "loss": 0.9203, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0930674755760908e-05, "epoch": 2.07, "percentage": 69.04, "elapsed_time": "10:54:32", "remaining_time": "4:53:27"} +{"current_steps": 3750, "total_steps": 5424, "loss": 0.9468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0870886941252872e-05, "epoch": 2.07, "percentage": 69.14, "elapsed_time": "10:55:25", "remaining_time": "4:52:34"} +{"current_steps": 3755, "total_steps": 5424, "loss": 0.9605, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0811217625827705e-05, "epoch": 2.08, "percentage": 69.23, "elapsed_time": "10:56:18", "remaining_time": "4:51:42"} +{"current_steps": 3760, "total_steps": 5424, "loss": 0.9166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0751667309924399e-05, "epoch": 2.08, "percentage": 69.32, "elapsed_time": "10:57:10", "remaining_time": "4:50:50"} +{"current_steps": 3765, "total_steps": 5424, "loss": 0.939, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0692236492983918e-05, "epoch": 2.08, "percentage": 69.41, "elapsed_time": "10:58:03", "remaining_time": "4:49:57"} +{"current_steps": 3770, "total_steps": 5424, "loss": 0.9409, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0632925673445001e-05, "epoch": 2.08, "percentage": 69.51, "elapsed_time": "10:58:56", "remaining_time": "4:49:05"} +{"current_steps": 3775, "total_steps": 5424, "loss": 0.9523, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0573735348739987e-05, "epoch": 2.09, "percentage": 69.6, "elapsed_time": "10:59:49", "remaining_time": "4:48:13"} +{"current_steps": 3780, "total_steps": 5424, "loss": 0.8867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0514666015290645e-05, "epoch": 2.09, "percentage": 69.69, "elapsed_time": "11:00:41", "remaining_time": "4:47:21"} +{"current_steps": 3785, "total_steps": 5424, "loss": 0.9816, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0455718168503967e-05, "epoch": 2.09, "percentage": 69.78, "elapsed_time": "11:01:34", "remaining_time": "4:46:28"} +{"current_steps": 3790, "total_steps": 5424, "loss": 0.9465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0396892302768127e-05, "epoch": 2.1, "percentage": 69.87, "elapsed_time": "11:02:27", "remaining_time": "4:45:36"} +{"current_steps": 3795, "total_steps": 5424, "loss": 0.9352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.033818891144817e-05, "epoch": 2.1, "percentage": 69.97, "elapsed_time": "11:03:19", "remaining_time": "4:44:44"} +{"current_steps": 3800, "total_steps": 5424, "loss": 0.8877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0279608486882054e-05, "epoch": 2.1, "percentage": 70.06, "elapsed_time": "11:04:12", "remaining_time": "4:43:51"} +{"current_steps": 3805, "total_steps": 5424, "loss": 0.9098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0221151520376343e-05, "epoch": 2.1, "percentage": 70.15, "elapsed_time": "11:05:05", "remaining_time": "4:42:59"} +{"current_steps": 3810, "total_steps": 5424, "loss": 0.8687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0162818502202251e-05, "epoch": 2.11, "percentage": 70.24, "elapsed_time": "11:05:57", "remaining_time": "4:42:06"} +{"current_steps": 3815, "total_steps": 5424, "loss": 0.9256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0104609921591387e-05, "epoch": 2.11, "percentage": 70.34, "elapsed_time": "11:06:50", "remaining_time": "4:41:14"} +{"current_steps": 3820, "total_steps": 5424, "loss": 0.92, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0046526266731782e-05, "epoch": 2.11, "percentage": 70.43, "elapsed_time": "11:07:42", "remaining_time": "4:40:22"} +{"current_steps": 3825, "total_steps": 5424, "loss": 0.8813, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.988568024763673e-06, "epoch": 2.12, "percentage": 70.52, "elapsed_time": "11:08:35", "remaining_time": "4:39:29"} +{"current_steps": 3830, "total_steps": 5424, "loss": 0.891, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.930735681775505e-06, "epoch": 2.12, "percentage": 70.61, "elapsed_time": "11:09:27", "remaining_time": "4:38:37"} +{"current_steps": 3835, "total_steps": 5424, "loss": 0.9479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.87302972279982e-06, "epoch": 2.12, "percentage": 70.7, "elapsed_time": "11:10:20", "remaining_time": "4:37:44"} +{"current_steps": 3840, "total_steps": 5424, "loss": 0.8963, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.815450631809191e-06, "epoch": 2.12, "percentage": 70.8, "elapsed_time": "11:11:12", "remaining_time": "4:36:52"} +{"current_steps": 3845, "total_steps": 5424, "loss": 0.9178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.757998891712172e-06, "epoch": 2.13, "percentage": 70.89, "elapsed_time": "11:12:05", "remaining_time": "4:36:00"} +{"current_steps": 3850, "total_steps": 5424, "loss": 0.9072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.700674984349228e-06, "epoch": 2.13, "percentage": 70.98, "elapsed_time": "11:12:58", "remaining_time": "4:35:07"} +{"current_steps": 3855, "total_steps": 5424, "loss": 0.8879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.643479390488717e-06, "epoch": 2.13, "percentage": 71.07, "elapsed_time": "11:13:50", "remaining_time": "4:34:15"} +{"current_steps": 3860, "total_steps": 5424, "loss": 0.9484, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.586412589822846e-06, "epoch": 2.13, "percentage": 71.17, "elapsed_time": "11:14:43", "remaining_time": "4:33:23"} +{"current_steps": 3865, "total_steps": 5424, "loss": 0.9173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.529475060963649e-06, "epoch": 2.14, "percentage": 71.26, "elapsed_time": "11:15:35", "remaining_time": "4:32:30"} +{"current_steps": 3870, "total_steps": 5424, "loss": 0.9318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.472667281438982e-06, "epoch": 2.14, "percentage": 71.35, "elapsed_time": "11:16:28", "remaining_time": "4:31:38"} +{"current_steps": 3875, "total_steps": 5424, "loss": 0.9242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.415989727688484e-06, "epoch": 2.14, "percentage": 71.44, "elapsed_time": "11:17:20", "remaining_time": "4:30:45"} +{"current_steps": 3880, "total_steps": 5424, "loss": 0.9314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.359442875059631e-06, "epoch": 2.15, "percentage": 71.53, "elapsed_time": "11:18:13", "remaining_time": "4:29:53"} +{"current_steps": 3885, "total_steps": 5424, "loss": 0.9227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.303027197803726e-06, "epoch": 2.15, "percentage": 71.63, "elapsed_time": "11:19:05", "remaining_time": "4:29:01"} +{"current_steps": 3890, "total_steps": 5424, "loss": 0.9253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.246743169071906e-06, "epoch": 2.15, "percentage": 71.72, "elapsed_time": "11:19:58", "remaining_time": "4:28:08"} +{"current_steps": 3895, "total_steps": 5424, "loss": 0.9457, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.190591260911201e-06, "epoch": 2.15, "percentage": 71.81, "elapsed_time": "11:20:51", "remaining_time": "4:27:16"} +{"current_steps": 3900, "total_steps": 5424, "loss": 0.9214, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.134571944260554e-06, "epoch": 2.16, "percentage": 71.9, "elapsed_time": "11:21:43", "remaining_time": "4:26:23"} +{"current_steps": 3905, "total_steps": 5424, "loss": 0.9534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.078685688946884e-06, "epoch": 2.16, "percentage": 71.99, "elapsed_time": "11:22:36", "remaining_time": "4:25:31"} +{"current_steps": 3910, "total_steps": 5424, "loss": 0.9236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.022932963681141e-06, "epoch": 2.16, "percentage": 72.09, "elapsed_time": "11:23:28", "remaining_time": "4:24:39"} +{"current_steps": 3915, "total_steps": 5424, "loss": 0.8905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.967314236054384e-06, "epoch": 2.16, "percentage": 72.18, "elapsed_time": "11:24:21", "remaining_time": "4:23:46"} +{"current_steps": 3920, "total_steps": 5424, "loss": 0.9319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.911829972533817e-06, "epoch": 2.17, "percentage": 72.27, "elapsed_time": "11:25:13", "remaining_time": "4:22:54"} +{"current_steps": 3925, "total_steps": 5424, "loss": 0.8989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.856480638458966e-06, "epoch": 2.17, "percentage": 72.36, "elapsed_time": "11:26:06", "remaining_time": "4:22:01"} +{"current_steps": 3930, "total_steps": 5424, "loss": 0.9234, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.80126669803766e-06, "epoch": 2.17, "percentage": 72.46, "elapsed_time": "11:26:58", "remaining_time": "4:21:09"} +{"current_steps": 3935, "total_steps": 5424, "loss": 0.9619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.746188614342263e-06, "epoch": 2.18, "percentage": 72.55, "elapsed_time": "11:27:51", "remaining_time": "4:20:17"} +{"current_steps": 3940, "total_steps": 5424, "loss": 0.9537, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.691246849305653e-06, "epoch": 2.18, "percentage": 72.64, "elapsed_time": "11:28:44", "remaining_time": "4:19:24"} +{"current_steps": 3945, "total_steps": 5424, "loss": 0.9809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.636441863717499e-06, "epoch": 2.18, "percentage": 72.73, "elapsed_time": "11:29:36", "remaining_time": "4:18:32"} +{"current_steps": 3950, "total_steps": 5424, "loss": 0.9219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.581774117220238e-06, "epoch": 2.18, "percentage": 72.82, "elapsed_time": "11:30:29", "remaining_time": "4:17:39"} +{"current_steps": 3955, "total_steps": 5424, "loss": 0.9502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.52724406830538e-06, "epoch": 2.19, "percentage": 72.92, "elapsed_time": "11:31:21", "remaining_time": "4:16:47"} +{"current_steps": 3960, "total_steps": 5424, "loss": 0.9277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.472852174309514e-06, "epoch": 2.19, "percentage": 73.01, "elapsed_time": "11:32:14", "remaining_time": "4:15:55"} +{"current_steps": 3965, "total_steps": 5424, "loss": 0.8978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.418598891410581e-06, "epoch": 2.19, "percentage": 73.1, "elapsed_time": "11:33:07", "remaining_time": "4:15:02"} +{"current_steps": 3970, "total_steps": 5424, "loss": 0.9026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.364484674624e-06, "epoch": 2.2, "percentage": 73.19, "elapsed_time": "11:33:59", "remaining_time": "4:14:10"} +{"current_steps": 3975, "total_steps": 5424, "loss": 0.9658, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.31050997779885e-06, "epoch": 2.2, "percentage": 73.29, "elapsed_time": "11:34:52", "remaining_time": "4:13:17"} +{"current_steps": 3980, "total_steps": 5424, "loss": 0.9288, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.25667525361409e-06, "epoch": 2.2, "percentage": 73.38, "elapsed_time": "11:35:44", "remaining_time": "4:12:25"} +{"current_steps": 3985, "total_steps": 5424, "loss": 0.9009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.202980953574735e-06, "epoch": 2.2, "percentage": 73.47, "elapsed_time": "11:36:37", "remaining_time": "4:11:33"} +{"current_steps": 3990, "total_steps": 5424, "loss": 0.934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.14942752800808e-06, "epoch": 2.21, "percentage": 73.56, "elapsed_time": "11:37:30", "remaining_time": "4:10:40"} +{"current_steps": 3995, "total_steps": 5424, "loss": 0.9261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.09601542605993e-06, "epoch": 2.21, "percentage": 73.65, "elapsed_time": "11:38:22", "remaining_time": "4:09:48"} +{"current_steps": 4000, "total_steps": 5424, "loss": 0.9086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.042745095690826e-06, "epoch": 2.21, "percentage": 73.75, "elapsed_time": "11:39:15", "remaining_time": "4:08:56"} +{"current_steps": 4005, "total_steps": 5424, "loss": 0.9397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.989616983672288e-06, "epoch": 2.21, "percentage": 73.84, "elapsed_time": "11:40:08", "remaining_time": "4:08:03"} +{"current_steps": 4010, "total_steps": 5424, "loss": 0.8661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.936631535583055e-06, "epoch": 2.22, "percentage": 73.93, "elapsed_time": "11:41:00", "remaining_time": "4:07:11"} +{"current_steps": 4015, "total_steps": 5424, "loss": 0.9433, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.88378919580538e-06, "epoch": 2.22, "percentage": 74.02, "elapsed_time": "11:41:53", "remaining_time": "4:06:18"} +{"current_steps": 4020, "total_steps": 5424, "loss": 0.9473, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.83109040752128e-06, "epoch": 2.22, "percentage": 74.12, "elapsed_time": "11:42:45", "remaining_time": "4:05:26"} +{"current_steps": 4025, "total_steps": 5424, "loss": 0.9177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.778535612708824e-06, "epoch": 2.23, "percentage": 74.21, "elapsed_time": "11:43:38", "remaining_time": "4:04:34"} +{"current_steps": 4030, "total_steps": 5424, "loss": 0.8932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.726125252138417e-06, "epoch": 2.23, "percentage": 74.3, "elapsed_time": "11:44:30", "remaining_time": "4:03:41"} +{"current_steps": 4035, "total_steps": 5424, "loss": 0.9187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.673859765369126e-06, "epoch": 2.23, "percentage": 74.39, "elapsed_time": "11:45:23", "remaining_time": "4:02:49"} +{"current_steps": 4040, "total_steps": 5424, "loss": 0.9009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.62173959074497e-06, "epoch": 2.23, "percentage": 74.48, "elapsed_time": "11:46:16", "remaining_time": "4:01:56"} +{"current_steps": 4045, "total_steps": 5424, "loss": 0.9348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.569765165391257e-06, "epoch": 2.24, "percentage": 74.58, "elapsed_time": "11:47:08", "remaining_time": "4:01:04"} +{"current_steps": 4050, "total_steps": 5424, "loss": 0.8827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.517936925210917e-06, "epoch": 2.24, "percentage": 74.67, "elapsed_time": "11:48:01", "remaining_time": "4:00:12"} +{"current_steps": 4055, "total_steps": 5424, "loss": 0.9668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.466255304880834e-06, "epoch": 2.24, "percentage": 74.76, "elapsed_time": "11:48:53", "remaining_time": "3:59:19"} +{"current_steps": 4060, "total_steps": 5424, "loss": 0.934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.4147207378482295e-06, "epoch": 2.24, "percentage": 74.85, "elapsed_time": "11:49:46", "remaining_time": "3:58:27"} +{"current_steps": 4065, "total_steps": 5424, "loss": 0.9251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.36333365632697e-06, "epoch": 2.25, "percentage": 74.94, "elapsed_time": "11:50:38", "remaining_time": "3:57:34"} +{"current_steps": 4070, "total_steps": 5424, "loss": 0.9225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.312094491294033e-06, "epoch": 2.25, "percentage": 75.04, "elapsed_time": "11:51:31", "remaining_time": "3:56:42"} +{"current_steps": 4075, "total_steps": 5424, "loss": 0.9198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.261003672485783e-06, "epoch": 2.25, "percentage": 75.13, "elapsed_time": "11:52:24", "remaining_time": "3:55:50"} +{"current_steps": 4080, "total_steps": 5424, "loss": 0.9023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.210061628394477e-06, "epoch": 2.26, "percentage": 75.22, "elapsed_time": "11:53:16", "remaining_time": "3:54:57"} +{"current_steps": 4085, "total_steps": 5424, "loss": 0.9378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.159268786264564e-06, "epoch": 2.26, "percentage": 75.31, "elapsed_time": "11:54:09", "remaining_time": "3:54:05"} +{"current_steps": 4090, "total_steps": 5424, "loss": 0.9148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.108625572089209e-06, "epoch": 2.26, "percentage": 75.41, "elapsed_time": "11:55:01", "remaining_time": "3:53:12"} +{"current_steps": 4095, "total_steps": 5424, "loss": 0.9308, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.05813241060661e-06, "epoch": 2.26, "percentage": 75.5, "elapsed_time": "11:55:54", "remaining_time": "3:52:20"} +{"current_steps": 4100, "total_steps": 5424, "loss": 0.8979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.007789725296557e-06, "epoch": 2.27, "percentage": 75.59, "elapsed_time": "11:56:46", "remaining_time": "3:51:28"} +{"current_steps": 4105, "total_steps": 5424, "loss": 0.9329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.957597938376748e-06, "epoch": 2.27, "percentage": 75.68, "elapsed_time": "11:57:39", "remaining_time": "3:50:35"} +{"current_steps": 4110, "total_steps": 5424, "loss": 0.8849, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.907557470799358e-06, "epoch": 2.27, "percentage": 75.77, "elapsed_time": "11:58:32", "remaining_time": "3:49:43"} +{"current_steps": 4115, "total_steps": 5424, "loss": 0.8879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.857668742247458e-06, "epoch": 2.28, "percentage": 75.87, "elapsed_time": "11:59:24", "remaining_time": "3:48:50"} +{"current_steps": 4120, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.807932171131498e-06, "epoch": 2.28, "percentage": 75.96, "elapsed_time": "12:00:17", "remaining_time": "3:47:58"} +{"current_steps": 4125, "total_steps": 5424, "loss": 0.9273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.758348174585804e-06, "epoch": 2.28, "percentage": 76.05, "elapsed_time": "12:01:09", "remaining_time": "3:47:06"} +{"current_steps": 4130, "total_steps": 5424, "loss": 0.9115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.7089171684650785e-06, "epoch": 2.28, "percentage": 76.14, "elapsed_time": "12:02:02", "remaining_time": "3:46:13"} +{"current_steps": 4135, "total_steps": 5424, "loss": 0.9431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.659639567340914e-06, "epoch": 2.29, "percentage": 76.24, "elapsed_time": "12:02:55", "remaining_time": "3:45:21"} +{"current_steps": 4140, "total_steps": 5424, "loss": 0.9515, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.610515784498314e-06, "epoch": 2.29, "percentage": 76.33, "elapsed_time": "12:03:47", "remaining_time": "3:44:28"} +{"current_steps": 4145, "total_steps": 5424, "loss": 0.9069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.561546231932228e-06, "epoch": 2.29, "percentage": 76.42, "elapsed_time": "12:04:40", "remaining_time": "3:43:36"} +{"current_steps": 4150, "total_steps": 5424, "loss": 0.9178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.512731320344101e-06, "epoch": 2.29, "percentage": 76.51, "elapsed_time": "12:05:32", "remaining_time": "3:42:44"} +{"current_steps": 4155, "total_steps": 5424, "loss": 0.9156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.464071459138405e-06, "epoch": 2.3, "percentage": 76.6, "elapsed_time": "12:06:25", "remaining_time": "3:41:51"} +{"current_steps": 4160, "total_steps": 5424, "loss": 0.9637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.415567056419244e-06, "epoch": 2.3, "percentage": 76.7, "elapsed_time": "12:07:17", "remaining_time": "3:40:59"} +{"current_steps": 4165, "total_steps": 5424, "loss": 0.9446, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.3672185189869e-06, "epoch": 2.3, "percentage": 76.79, "elapsed_time": "12:08:10", "remaining_time": "3:40:06"} +{"current_steps": 4170, "total_steps": 5424, "loss": 0.8726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.319026252334445e-06, "epoch": 2.31, "percentage": 76.88, "elapsed_time": "12:09:03", "remaining_time": "3:39:14"} +{"current_steps": 4175, "total_steps": 5424, "loss": 0.9324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.270990660644313e-06, "epoch": 2.31, "percentage": 76.97, "elapsed_time": "12:09:55", "remaining_time": "3:38:21"} +{"current_steps": 4180, "total_steps": 5424, "loss": 0.9252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.223112146784935e-06, "epoch": 2.31, "percentage": 77.06, "elapsed_time": "12:10:48", "remaining_time": "3:37:29"} +{"current_steps": 4185, "total_steps": 5424, "loss": 0.8924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.1753911123073435e-06, "epoch": 2.31, "percentage": 77.16, "elapsed_time": "12:11:40", "remaining_time": "3:36:37"} +{"current_steps": 4190, "total_steps": 5424, "loss": 0.9344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.127827957441817e-06, "epoch": 2.32, "percentage": 77.25, "elapsed_time": "12:12:33", "remaining_time": "3:35:44"} +{"current_steps": 4195, "total_steps": 5424, "loss": 0.9341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.080423081094522e-06, "epoch": 2.32, "percentage": 77.34, "elapsed_time": "12:13:26", "remaining_time": "3:34:52"} +{"current_steps": 4200, "total_steps": 5424, "loss": 0.905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.033176880844133e-06, "epoch": 2.32, "percentage": 77.43, "elapsed_time": "12:14:18", "remaining_time": "3:33:59"} +{"current_steps": 4205, "total_steps": 5424, "loss": 0.9231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.986089752938584e-06, "epoch": 2.33, "percentage": 77.53, "elapsed_time": "12:15:11", "remaining_time": "3:33:07"} +{"current_steps": 4210, "total_steps": 5424, "loss": 0.906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.939162092291622e-06, "epoch": 2.33, "percentage": 77.62, "elapsed_time": "12:16:04", "remaining_time": "3:32:15"} +{"current_steps": 4215, "total_steps": 5424, "loss": 0.8957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.892394292479633e-06, "epoch": 2.33, "percentage": 77.71, "elapsed_time": "12:16:56", "remaining_time": "3:31:22"} +{"current_steps": 4220, "total_steps": 5424, "loss": 0.867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.8457867457382024e-06, "epoch": 2.33, "percentage": 77.8, "elapsed_time": "12:17:49", "remaining_time": "3:30:30"} +{"current_steps": 4225, "total_steps": 5424, "loss": 0.9022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.7993398429589506e-06, "epoch": 2.34, "percentage": 77.89, "elapsed_time": "12:18:41", "remaining_time": "3:29:37"} +{"current_steps": 4230, "total_steps": 5424, "loss": 0.9064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.753053973686148e-06, "epoch": 2.34, "percentage": 77.99, "elapsed_time": "12:19:34", "remaining_time": "3:28:45"} +{"current_steps": 4235, "total_steps": 5424, "loss": 0.9019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.7069295261135525e-06, "epoch": 2.34, "percentage": 78.08, "elapsed_time": "12:20:27", "remaining_time": "3:27:53"} +{"current_steps": 4240, "total_steps": 5424, "loss": 0.9397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.66096688708104e-06, "epoch": 2.34, "percentage": 78.17, "elapsed_time": "12:21:19", "remaining_time": "3:27:00"} +{"current_steps": 4245, "total_steps": 5424, "loss": 0.8971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.615166442071457e-06, "epoch": 2.35, "percentage": 78.26, "elapsed_time": "12:22:12", "remaining_time": "3:26:08"} +{"current_steps": 4250, "total_steps": 5424, "loss": 0.8902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.569528575207339e-06, "epoch": 2.35, "percentage": 78.36, "elapsed_time": "12:23:04", "remaining_time": "3:25:15"} +{"current_steps": 4255, "total_steps": 5424, "loss": 0.8982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.5240536692477e-06, "epoch": 2.35, "percentage": 78.45, "elapsed_time": "12:23:57", "remaining_time": "3:24:23"} +{"current_steps": 4260, "total_steps": 5424, "loss": 0.9135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4787421055848164e-06, "epoch": 2.36, "percentage": 78.54, "elapsed_time": "12:24:50", "remaining_time": "3:23:31"} +{"current_steps": 4265, "total_steps": 5424, "loss": 0.8969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.433594264241043e-06, "epoch": 2.36, "percentage": 78.63, "elapsed_time": "12:25:42", "remaining_time": "3:22:38"} +{"current_steps": 4270, "total_steps": 5424, "loss": 0.9036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3886105238656055e-06, "epoch": 2.36, "percentage": 78.72, "elapsed_time": "12:26:35", "remaining_time": "3:21:46"} +{"current_steps": 4275, "total_steps": 5424, "loss": 0.9313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3437912617314425e-06, "epoch": 2.36, "percentage": 78.82, "elapsed_time": "12:27:27", "remaining_time": "3:20:53"} +{"current_steps": 4280, "total_steps": 5424, "loss": 0.9034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.299136853732034e-06, "epoch": 2.37, "percentage": 78.91, "elapsed_time": "12:28:20", "remaining_time": "3:20:01"} +{"current_steps": 4285, "total_steps": 5424, "loss": 0.9673, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.254647674378252e-06, "epoch": 2.37, "percentage": 79.0, "elapsed_time": "12:29:13", "remaining_time": "3:19:09"} +{"current_steps": 4290, "total_steps": 5424, "loss": 0.9044, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.210324096795202e-06, "epoch": 2.37, "percentage": 79.09, "elapsed_time": "12:30:05", "remaining_time": "3:18:16"} +{"current_steps": 4295, "total_steps": 5424, "loss": 0.937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.166166492719124e-06, "epoch": 2.37, "percentage": 79.19, "elapsed_time": "12:30:58", "remaining_time": "3:17:24"} +{"current_steps": 4300, "total_steps": 5424, "loss": 0.9095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.122175232494255e-06, "epoch": 2.38, "percentage": 79.28, "elapsed_time": "12:31:50", "remaining_time": "3:16:31"} +{"current_steps": 4305, "total_steps": 5424, "loss": 0.8787, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.078350685069727e-06, "epoch": 2.38, "percentage": 79.37, "elapsed_time": "12:32:43", "remaining_time": "3:15:39"} +{"current_steps": 4310, "total_steps": 5424, "loss": 0.8945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.03469321799647e-06, "epoch": 2.38, "percentage": 79.46, "elapsed_time": "12:33:36", "remaining_time": "3:14:46"} +{"current_steps": 4315, "total_steps": 5424, "loss": 0.9285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9912031974241376e-06, "epoch": 2.39, "percentage": 79.55, "elapsed_time": "12:34:29", "remaining_time": "3:13:54"} +{"current_steps": 4320, "total_steps": 5424, "loss": 0.9318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.947880988098025e-06, "epoch": 2.39, "percentage": 79.65, "elapsed_time": "12:35:21", "remaining_time": "3:13:02"} +{"current_steps": 4325, "total_steps": 5424, "loss": 0.9171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.90472695335602e-06, "epoch": 2.39, "percentage": 79.74, "elapsed_time": "12:36:14", "remaining_time": "3:12:09"} +{"current_steps": 4330, "total_steps": 5424, "loss": 0.923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8617414551255545e-06, "epoch": 2.39, "percentage": 79.83, "elapsed_time": "12:37:06", "remaining_time": "3:11:17"} +{"current_steps": 4335, "total_steps": 5424, "loss": 0.9153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.818924853920545e-06, "epoch": 2.4, "percentage": 79.92, "elapsed_time": "12:37:59", "remaining_time": "3:10:24"} +{"current_steps": 4340, "total_steps": 5424, "loss": 0.907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.776277508838428e-06, "epoch": 2.4, "percentage": 80.01, "elapsed_time": "12:38:52", "remaining_time": "3:09:32"} +{"current_steps": 4345, "total_steps": 5424, "loss": 0.8869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.733799777557069e-06, "epoch": 2.4, "percentage": 80.11, "elapsed_time": "12:39:44", "remaining_time": "3:08:40"} +{"current_steps": 4350, "total_steps": 5424, "loss": 0.95, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.691492016331842e-06, "epoch": 2.41, "percentage": 80.2, "elapsed_time": "12:40:37", "remaining_time": "3:07:47"} +{"current_steps": 4355, "total_steps": 5424, "loss": 0.9145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6493545799925655e-06, "epoch": 2.41, "percentage": 80.29, "elapsed_time": "12:41:29", "remaining_time": "3:06:55"} +{"current_steps": 4360, "total_steps": 5424, "loss": 0.9175, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.607387821940609e-06, "epoch": 2.41, "percentage": 80.38, "elapsed_time": "12:42:22", "remaining_time": "3:06:02"} +{"current_steps": 4365, "total_steps": 5424, "loss": 0.9125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.565592094145835e-06, "epoch": 2.41, "percentage": 80.48, "elapsed_time": "12:43:15", "remaining_time": "3:05:10"} +{"current_steps": 4370, "total_steps": 5424, "loss": 0.8972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.523967747143745e-06, "epoch": 2.42, "percentage": 80.57, "elapsed_time": "12:44:07", "remaining_time": "3:04:18"} +{"current_steps": 4375, "total_steps": 5424, "loss": 0.9113, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.482515130032453e-06, "epoch": 2.42, "percentage": 80.66, "elapsed_time": "12:45:00", "remaining_time": "3:03:25"} +{"current_steps": 4380, "total_steps": 5424, "loss": 0.9447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.441234590469817e-06, "epoch": 2.42, "percentage": 80.75, "elapsed_time": "12:45:53", "remaining_time": "3:02:33"} +{"current_steps": 4385, "total_steps": 5424, "loss": 0.9472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.400126474670499e-06, "epoch": 2.42, "percentage": 80.84, "elapsed_time": "12:46:46", "remaining_time": "3:01:40"} +{"current_steps": 4390, "total_steps": 5424, "loss": 0.9391, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.359191127403059e-06, "epoch": 2.43, "percentage": 80.94, "elapsed_time": "12:47:38", "remaining_time": "3:00:48"} +{"current_steps": 4395, "total_steps": 5424, "loss": 0.9171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.318428891987078e-06, "epoch": 2.43, "percentage": 81.03, "elapsed_time": "12:48:31", "remaining_time": "2:59:56"} +{"current_steps": 4400, "total_steps": 5424, "loss": 0.9468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2778401102902595e-06, "epoch": 2.43, "percentage": 81.12, "elapsed_time": "12:49:23", "remaining_time": "2:59:03"} +{"current_steps": 4405, "total_steps": 5424, "loss": 0.9034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.237425122725586e-06, "epoch": 2.44, "percentage": 81.21, "elapsed_time": "12:50:16", "remaining_time": "2:58:11"} +{"current_steps": 4410, "total_steps": 5424, "loss": 0.8667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.197184268248436e-06, "epoch": 2.44, "percentage": 81.31, "elapsed_time": "12:51:09", "remaining_time": "2:57:18"} +{"current_steps": 4415, "total_steps": 5424, "loss": 0.9553, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.157117884353765e-06, "epoch": 2.44, "percentage": 81.4, "elapsed_time": "12:52:01", "remaining_time": "2:56:26"} +{"current_steps": 4420, "total_steps": 5424, "loss": 0.931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.117226307073268e-06, "epoch": 2.44, "percentage": 81.49, "elapsed_time": "12:52:54", "remaining_time": "2:55:33"} +{"current_steps": 4425, "total_steps": 5424, "loss": 0.9287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.07750987097254e-06, "epoch": 2.45, "percentage": 81.58, "elapsed_time": "12:53:47", "remaining_time": "2:54:41"} +{"current_steps": 4430, "total_steps": 5424, "loss": 0.8737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.037968909148326e-06, "epoch": 2.45, "percentage": 81.67, "elapsed_time": "12:54:39", "remaining_time": "2:53:49"} +{"current_steps": 4435, "total_steps": 5424, "loss": 0.9677, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.998603753225647e-06, "epoch": 2.45, "percentage": 81.77, "elapsed_time": "12:55:32", "remaining_time": "2:52:56"} +{"current_steps": 4440, "total_steps": 5424, "loss": 0.9194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.959414733355094e-06, "epoch": 2.46, "percentage": 81.86, "elapsed_time": "12:56:24", "remaining_time": "2:52:04"} +{"current_steps": 4445, "total_steps": 5424, "loss": 0.9424, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9204021782100115e-06, "epoch": 2.46, "percentage": 81.95, "elapsed_time": "12:57:17", "remaining_time": "2:51:11"} +{"current_steps": 4450, "total_steps": 5424, "loss": 0.9402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8815664149837675e-06, "epoch": 2.46, "percentage": 82.04, "elapsed_time": "12:58:10", "remaining_time": "2:50:19"} +{"current_steps": 4455, "total_steps": 5424, "loss": 0.9196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8429077693869854e-06, "epoch": 2.46, "percentage": 82.13, "elapsed_time": "12:59:02", "remaining_time": "2:49:26"} +{"current_steps": 4460, "total_steps": 5424, "loss": 0.9357, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.804426565644839e-06, "epoch": 2.47, "percentage": 82.23, "elapsed_time": "12:59:55", "remaining_time": "2:48:34"} +{"current_steps": 4465, "total_steps": 5424, "loss": 0.9234, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7661231264943086e-06, "epoch": 2.47, "percentage": 82.32, "elapsed_time": "13:00:48", "remaining_time": "2:47:42"} +{"current_steps": 4470, "total_steps": 5424, "loss": 0.9135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7279977731814963e-06, "epoch": 2.47, "percentage": 82.41, "elapsed_time": "13:01:40", "remaining_time": "2:46:49"} +{"current_steps": 4475, "total_steps": 5424, "loss": 0.9074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.690050825458913e-06, "epoch": 2.47, "percentage": 82.5, "elapsed_time": "13:02:33", "remaining_time": "2:45:57"} +{"current_steps": 4480, "total_steps": 5424, "loss": 0.8658, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.652282601582793e-06, "epoch": 2.48, "percentage": 82.6, "elapsed_time": "13:03:25", "remaining_time": "2:45:04"} +{"current_steps": 4485, "total_steps": 5424, "loss": 0.9522, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6146934183104748e-06, "epoch": 2.48, "percentage": 82.69, "elapsed_time": "13:04:18", "remaining_time": "2:44:12"} +{"current_steps": 4490, "total_steps": 5424, "loss": 0.937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5772835908976538e-06, "epoch": 2.48, "percentage": 82.78, "elapsed_time": "13:05:11", "remaining_time": "2:43:19"} +{"current_steps": 4495, "total_steps": 5424, "loss": 0.9336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.540053433095841e-06, "epoch": 2.49, "percentage": 82.87, "elapsed_time": "13:06:03", "remaining_time": "2:42:27"} +{"current_steps": 4500, "total_steps": 5424, "loss": 0.9437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.503003257149637e-06, "epoch": 2.49, "percentage": 82.96, "elapsed_time": "13:06:56", "remaining_time": "2:41:35"} +{"current_steps": 4505, "total_steps": 5424, "loss": 0.9084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4661333737941976e-06, "epoch": 2.49, "percentage": 83.06, "elapsed_time": "13:07:49", "remaining_time": "2:40:42"} +{"current_steps": 4510, "total_steps": 5424, "loss": 0.9378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.429444092252554e-06, "epoch": 2.49, "percentage": 83.15, "elapsed_time": "13:08:41", "remaining_time": "2:39:50"} +{"current_steps": 4515, "total_steps": 5424, "loss": 0.8999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.39293572023307e-06, "epoch": 2.5, "percentage": 83.24, "elapsed_time": "13:09:34", "remaining_time": "2:38:57"} +{"current_steps": 4520, "total_steps": 5424, "loss": 0.9417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3566085639268413e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "13:10:27", "remaining_time": "2:38:05"} +{"current_steps": 4525, "total_steps": 5424, "loss": 0.9288, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.32046292800513e-06, "epoch": 2.5, "percentage": 83.43, "elapsed_time": "13:11:19", "remaining_time": "2:37:13"} +{"current_steps": 4530, "total_steps": 5424, "loss": 0.8957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2844991156168097e-06, "epoch": 2.5, "percentage": 83.52, "elapsed_time": "13:12:12", "remaining_time": "2:36:20"} +{"current_steps": 4535, "total_steps": 5424, "loss": 0.9002, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2487174283858223e-06, "epoch": 2.51, "percentage": 83.61, "elapsed_time": "13:13:05", "remaining_time": "2:35:28"} +{"current_steps": 4540, "total_steps": 5424, "loss": 0.9315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2131181664086517e-06, "epoch": 2.51, "percentage": 83.7, "elapsed_time": "13:13:57", "remaining_time": "2:34:35"} +{"current_steps": 4545, "total_steps": 5424, "loss": 0.8948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1777016282517975e-06, "epoch": 2.51, "percentage": 83.79, "elapsed_time": "13:14:50", "remaining_time": "2:33:43"} +{"current_steps": 4550, "total_steps": 5424, "loss": 0.9015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.142468110949287e-06, "epoch": 2.52, "percentage": 83.89, "elapsed_time": "13:15:43", "remaining_time": "2:32:50"} +{"current_steps": 4555, "total_steps": 5424, "loss": 0.9273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1074179100001737e-06, "epoch": 2.52, "percentage": 83.98, "elapsed_time": "13:16:35", "remaining_time": "2:31:58"} +{"current_steps": 4560, "total_steps": 5424, "loss": 0.9307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0725513193660404e-06, "epoch": 2.52, "percentage": 84.07, "elapsed_time": "13:17:28", "remaining_time": "2:31:05"} +{"current_steps": 4565, "total_steps": 5424, "loss": 0.9075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0378686314685934e-06, "epoch": 2.52, "percentage": 84.16, "elapsed_time": "13:18:20", "remaining_time": "2:30:13"} +{"current_steps": 4570, "total_steps": 5424, "loss": 0.8821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.003370137187128e-06, "epoch": 2.53, "percentage": 84.26, "elapsed_time": "13:19:13", "remaining_time": "2:29:21"} +{"current_steps": 4575, "total_steps": 5424, "loss": 0.9245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.969056125856154e-06, "epoch": 2.53, "percentage": 84.35, "elapsed_time": "13:20:06", "remaining_time": "2:28:28"} +{"current_steps": 4580, "total_steps": 5424, "loss": 0.9346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.93492688526294e-06, "epoch": 2.53, "percentage": 84.44, "elapsed_time": "13:20:58", "remaining_time": "2:27:36"} +{"current_steps": 4585, "total_steps": 5424, "loss": 0.9226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.900982701645111e-06, "epoch": 2.54, "percentage": 84.53, "elapsed_time": "13:21:51", "remaining_time": "2:26:43"} +{"current_steps": 4590, "total_steps": 5424, "loss": 0.9215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.867223859688237e-06, "epoch": 2.54, "percentage": 84.62, "elapsed_time": "13:22:43", "remaining_time": "2:25:51"} +{"current_steps": 4595, "total_steps": 5424, "loss": 0.878, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.83365064252345e-06, "epoch": 2.54, "percentage": 84.72, "elapsed_time": "13:23:36", "remaining_time": "2:24:58"} +{"current_steps": 4600, "total_steps": 5424, "loss": 0.9207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.800263331725078e-06, "epoch": 2.54, "percentage": 84.81, "elapsed_time": "13:24:29", "remaining_time": "2:24:06"} +{"current_steps": 4605, "total_steps": 5424, "loss": 0.9156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7670622073082657e-06, "epoch": 2.55, "percentage": 84.9, "elapsed_time": "13:25:21", "remaining_time": "2:23:14"} +{"current_steps": 4610, "total_steps": 5424, "loss": 0.9296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7340475477266507e-06, "epoch": 2.55, "percentage": 84.99, "elapsed_time": "13:26:14", "remaining_time": "2:22:21"} +{"current_steps": 4615, "total_steps": 5424, "loss": 0.9346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.701219629869986e-06, "epoch": 2.55, "percentage": 85.08, "elapsed_time": "13:27:06", "remaining_time": "2:21:29"} +{"current_steps": 4620, "total_steps": 5424, "loss": 0.9237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6685787290618825e-06, "epoch": 2.55, "percentage": 85.18, "elapsed_time": "13:27:59", "remaining_time": "2:20:36"} +{"current_steps": 4625, "total_steps": 5424, "loss": 0.9162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.636125119057428e-06, "epoch": 2.56, "percentage": 85.27, "elapsed_time": "13:28:52", "remaining_time": "2:19:44"} +{"current_steps": 4630, "total_steps": 5424, "loss": 0.9145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6038590720409565e-06, "epoch": 2.56, "percentage": 85.36, "elapsed_time": "13:29:44", "remaining_time": "2:18:51"} +{"current_steps": 4635, "total_steps": 5424, "loss": 0.9776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5717808586237067e-06, "epoch": 2.56, "percentage": 85.45, "elapsed_time": "13:30:37", "remaining_time": "2:17:59"} +{"current_steps": 4640, "total_steps": 5424, "loss": 0.9519, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.539890747841611e-06, "epoch": 2.57, "percentage": 85.55, "elapsed_time": "13:31:30", "remaining_time": "2:17:06"} +{"current_steps": 4645, "total_steps": 5424, "loss": 0.9116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5081890071529695e-06, "epoch": 2.57, "percentage": 85.64, "elapsed_time": "13:32:22", "remaining_time": "2:16:14"} +{"current_steps": 4650, "total_steps": 5424, "loss": 0.9217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4766759024362927e-06, "epoch": 2.57, "percentage": 85.73, "elapsed_time": "13:33:15", "remaining_time": "2:15:22"} +{"current_steps": 4655, "total_steps": 5424, "loss": 0.9113, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.445351697987988e-06, "epoch": 2.57, "percentage": 85.82, "elapsed_time": "13:34:07", "remaining_time": "2:14:29"} +{"current_steps": 4660, "total_steps": 5424, "loss": 0.9089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.414216656520191e-06, "epoch": 2.58, "percentage": 85.91, "elapsed_time": "13:35:00", "remaining_time": "2:13:37"} +{"current_steps": 4665, "total_steps": 5424, "loss": 0.9367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3832710391585605e-06, "epoch": 2.58, "percentage": 86.01, "elapsed_time": "13:35:52", "remaining_time": "2:12:44"} +{"current_steps": 4670, "total_steps": 5424, "loss": 0.9453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3525151054400675e-06, "epoch": 2.58, "percentage": 86.1, "elapsed_time": "13:36:45", "remaining_time": "2:11:52"} +{"current_steps": 4675, "total_steps": 5424, "loss": 0.9187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3219491133108394e-06, "epoch": 2.59, "percentage": 86.19, "elapsed_time": "13:37:38", "remaining_time": "2:10:59"} +{"current_steps": 4680, "total_steps": 5424, "loss": 0.911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2915733191239824e-06, "epoch": 2.59, "percentage": 86.28, "elapsed_time": "13:38:31", "remaining_time": "2:10:07"} +{"current_steps": 4685, "total_steps": 5424, "loss": 0.9309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.261387977637436e-06, "epoch": 2.59, "percentage": 86.38, "elapsed_time": "13:39:24", "remaining_time": "2:09:15"} +{"current_steps": 4690, "total_steps": 5424, "loss": 0.8921, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2313933420118395e-06, "epoch": 2.59, "percentage": 86.47, "elapsed_time": "13:40:18", "remaining_time": "2:08:22"} +{"current_steps": 4695, "total_steps": 5424, "loss": 0.9332, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2015896638084037e-06, "epoch": 2.6, "percentage": 86.56, "elapsed_time": "13:41:10", "remaining_time": "2:07:30"} +{"current_steps": 4700, "total_steps": 5424, "loss": 0.9316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.171977192986813e-06, "epoch": 2.6, "percentage": 86.65, "elapsed_time": "13:42:03", "remaining_time": "2:06:37"} +{"current_steps": 4705, "total_steps": 5424, "loss": 0.9453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.142556177903096e-06, "epoch": 2.6, "percentage": 86.74, "elapsed_time": "13:42:56", "remaining_time": "2:05:45"} +{"current_steps": 4710, "total_steps": 5424, "loss": 0.937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1133268653076022e-06, "epoch": 2.6, "percentage": 86.84, "elapsed_time": "13:43:49", "remaining_time": "2:04:53"} +{"current_steps": 4715, "total_steps": 5424, "loss": 0.9067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.084289500342862e-06, "epoch": 2.61, "percentage": 86.93, "elapsed_time": "13:44:42", "remaining_time": "2:04:00"} +{"current_steps": 4720, "total_steps": 5424, "loss": 0.9694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0554443265415864e-06, "epoch": 2.61, "percentage": 87.02, "elapsed_time": "13:45:36", "remaining_time": "2:03:08"} +{"current_steps": 4725, "total_steps": 5424, "loss": 0.9217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0267915858245943e-06, "epoch": 2.61, "percentage": 87.11, "elapsed_time": "13:46:29", "remaining_time": "2:02:16"} +{"current_steps": 4730, "total_steps": 5424, "loss": 0.9399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.998331518498797e-06, "epoch": 2.62, "percentage": 87.21, "elapsed_time": "13:47:22", "remaining_time": "2:01:23"} +{"current_steps": 4735, "total_steps": 5424, "loss": 0.9164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.970064363255175e-06, "epoch": 2.62, "percentage": 87.3, "elapsed_time": "13:48:15", "remaining_time": "2:00:31"} +{"current_steps": 4740, "total_steps": 5424, "loss": 0.9336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.941990357166784e-06, "epoch": 2.62, "percentage": 87.39, "elapsed_time": "13:49:08", "remaining_time": "1:59:38"} +{"current_steps": 4745, "total_steps": 5424, "loss": 0.926, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9141097356867644e-06, "epoch": 2.62, "percentage": 87.48, "elapsed_time": "13:50:00", "remaining_time": "1:58:46"} +{"current_steps": 4750, "total_steps": 5424, "loss": 0.9109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8864227326463452e-06, "epoch": 2.63, "percentage": 87.57, "elapsed_time": "13:50:53", "remaining_time": "1:57:53"} +{"current_steps": 4755, "total_steps": 5424, "loss": 0.9528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8589295802529328e-06, "epoch": 2.63, "percentage": 87.67, "elapsed_time": "13:51:45", "remaining_time": "1:57:01"} +{"current_steps": 4760, "total_steps": 5424, "loss": 0.9142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8316305090881003e-06, "epoch": 2.63, "percentage": 87.76, "elapsed_time": "13:52:38", "remaining_time": "1:56:09"} +{"current_steps": 4765, "total_steps": 5424, "loss": 0.9247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8045257481057204e-06, "epoch": 2.63, "percentage": 87.85, "elapsed_time": "13:53:31", "remaining_time": "1:55:16"} +{"current_steps": 4770, "total_steps": 5424, "loss": 0.9162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7776155246299747e-06, "epoch": 2.64, "percentage": 87.94, "elapsed_time": "13:54:24", "remaining_time": "1:54:24"} +{"current_steps": 4775, "total_steps": 5424, "loss": 0.9084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7509000643535167e-06, "epoch": 2.64, "percentage": 88.03, "elapsed_time": "13:55:17", "remaining_time": "1:53:31"} +{"current_steps": 4780, "total_steps": 5424, "loss": 0.8427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7243795913355148e-06, "epoch": 2.64, "percentage": 88.13, "elapsed_time": "13:56:09", "remaining_time": "1:52:39"} +{"current_steps": 4785, "total_steps": 5424, "loss": 0.8851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6980543279998401e-06, "epoch": 2.65, "percentage": 88.22, "elapsed_time": "13:57:02", "remaining_time": "1:51:46"} +{"current_steps": 4790, "total_steps": 5424, "loss": 0.9155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.671924495133126e-06, "epoch": 2.65, "percentage": 88.31, "elapsed_time": "13:57:55", "remaining_time": "1:50:54"} +{"current_steps": 4795, "total_steps": 5424, "loss": 0.9049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6459903118829777e-06, "epoch": 2.65, "percentage": 88.4, "elapsed_time": "13:58:48", "remaining_time": "1:50:02"} +{"current_steps": 4800, "total_steps": 5424, "loss": 0.8831, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6202519957561114e-06, "epoch": 2.65, "percentage": 88.5, "elapsed_time": "13:59:41", "remaining_time": "1:49:09"} +{"current_steps": 4805, "total_steps": 5424, "loss": 0.9286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5947097626165252e-06, "epoch": 2.66, "percentage": 88.59, "elapsed_time": "14:00:34", "remaining_time": "1:48:17"} +{"current_steps": 4810, "total_steps": 5424, "loss": 0.9299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5693638266836952e-06, "epoch": 2.66, "percentage": 88.68, "elapsed_time": "14:01:27", "remaining_time": "1:47:24"} +{"current_steps": 4815, "total_steps": 5424, "loss": 0.8739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5442144005307774e-06, "epoch": 2.66, "percentage": 88.77, "elapsed_time": "14:02:19", "remaining_time": "1:46:32"} +{"current_steps": 4820, "total_steps": 5424, "loss": 0.9201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.519261695082827e-06, "epoch": 2.67, "percentage": 88.86, "elapsed_time": "14:03:12", "remaining_time": "1:45:39"} +{"current_steps": 4825, "total_steps": 5424, "loss": 0.9062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4945059196150247e-06, "epoch": 2.67, "percentage": 88.96, "elapsed_time": "14:04:05", "remaining_time": "1:44:47"} +{"current_steps": 4830, "total_steps": 5424, "loss": 0.9304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4699472817509248e-06, "epoch": 2.67, "percentage": 89.05, "elapsed_time": "14:04:57", "remaining_time": "1:43:54"} +{"current_steps": 4835, "total_steps": 5424, "loss": 0.9253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4455859874607235e-06, "epoch": 2.67, "percentage": 89.14, "elapsed_time": "14:05:50", "remaining_time": "1:43:02"} +{"current_steps": 4840, "total_steps": 5424, "loss": 0.9261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4214222410594947e-06, "epoch": 2.68, "percentage": 89.23, "elapsed_time": "14:06:43", "remaining_time": "1:42:10"} +{"current_steps": 4845, "total_steps": 5424, "loss": 0.8764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3974562452055418e-06, "epoch": 2.68, "percentage": 89.33, "elapsed_time": "14:07:37", "remaining_time": "1:41:17"} +{"current_steps": 4850, "total_steps": 5424, "loss": 0.8906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3736882008986262e-06, "epoch": 2.68, "percentage": 89.42, "elapsed_time": "14:08:30", "remaining_time": "1:40:25"} +{"current_steps": 4855, "total_steps": 5424, "loss": 0.9671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3501183074783263e-06, "epoch": 2.68, "percentage": 89.51, "elapsed_time": "14:09:23", "remaining_time": "1:39:32"} +{"current_steps": 4860, "total_steps": 5424, "loss": 0.8979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3267467626223606e-06, "epoch": 2.69, "percentage": 89.6, "elapsed_time": "14:10:17", "remaining_time": "1:38:40"} +{"current_steps": 4865, "total_steps": 5424, "loss": 0.9284, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3035737623449146e-06, "epoch": 2.69, "percentage": 89.69, "elapsed_time": "14:11:10", "remaining_time": "1:37:48"} +{"current_steps": 4870, "total_steps": 5424, "loss": 0.9338, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2805995009950083e-06, "epoch": 2.69, "percentage": 89.79, "elapsed_time": "14:12:03", "remaining_time": "1:36:55"} +{"current_steps": 4875, "total_steps": 5424, "loss": 0.9474, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.257824171254865e-06, "epoch": 2.7, "percentage": 89.88, "elapsed_time": "14:12:56", "remaining_time": "1:36:03"} +{"current_steps": 4880, "total_steps": 5424, "loss": 0.9307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2352479641382919e-06, "epoch": 2.7, "percentage": 89.97, "elapsed_time": "14:13:49", "remaining_time": "1:35:10"} +{"current_steps": 4885, "total_steps": 5424, "loss": 0.9578, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2128710689890826e-06, "epoch": 2.7, "percentage": 90.06, "elapsed_time": "14:14:42", "remaining_time": "1:34:18"} +{"current_steps": 4890, "total_steps": 5424, "loss": 0.9123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1906936734794233e-06, "epoch": 2.7, "percentage": 90.15, "elapsed_time": "14:15:34", "remaining_time": "1:33:25"} +{"current_steps": 4895, "total_steps": 5424, "loss": 0.9157, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1687159636083161e-06, "epoch": 2.71, "percentage": 90.25, "elapsed_time": "14:16:27", "remaining_time": "1:32:33"} +{"current_steps": 4900, "total_steps": 5424, "loss": 0.9135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1469381237000476e-06, "epoch": 2.71, "percentage": 90.34, "elapsed_time": "14:17:20", "remaining_time": "1:31:40"} +{"current_steps": 4905, "total_steps": 5424, "loss": 0.9073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1253603364025867e-06, "epoch": 2.71, "percentage": 90.43, "elapsed_time": "14:18:12", "remaining_time": "1:30:48"} +{"current_steps": 4910, "total_steps": 5424, "loss": 0.9198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1039827826861193e-06, "epoch": 2.71, "percentage": 90.52, "elapsed_time": "14:19:05", "remaining_time": "1:29:56"} +{"current_steps": 4915, "total_steps": 5424, "loss": 0.911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0828056418414695e-06, "epoch": 2.72, "percentage": 90.62, "elapsed_time": "14:19:58", "remaining_time": "1:29:03"} +{"current_steps": 4920, "total_steps": 5424, "loss": 0.9124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.06182909147865e-06, "epoch": 2.72, "percentage": 90.71, "elapsed_time": "14:20:51", "remaining_time": "1:28:11"} +{"current_steps": 4925, "total_steps": 5424, "loss": 0.9308, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0410533075253248e-06, "epoch": 2.72, "percentage": 90.8, "elapsed_time": "14:21:43", "remaining_time": "1:27:18"} +{"current_steps": 4930, "total_steps": 5424, "loss": 0.9005, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.020478464225369e-06, "epoch": 2.73, "percentage": 90.89, "elapsed_time": "14:22:36", "remaining_time": "1:26:26"} +{"current_steps": 4935, "total_steps": 5424, "loss": 0.9326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0001047341373832e-06, "epoch": 2.73, "percentage": 90.98, "elapsed_time": "14:23:29", "remaining_time": "1:25:33"} +{"current_steps": 4940, "total_steps": 5424, "loss": 0.8905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.7993228813327e-07, "epoch": 2.73, "percentage": 91.08, "elapsed_time": "14:24:21", "remaining_time": "1:24:41"} +{"current_steps": 4945, "total_steps": 5424, "loss": 0.9238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.599612953967746e-07, "epoch": 2.73, "percentage": 91.17, "elapsed_time": "14:25:14", "remaining_time": "1:23:48"} +{"current_steps": 4950, "total_steps": 5424, "loss": 0.8964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.401919234220902e-07, "epoch": 2.74, "percentage": 91.26, "elapsed_time": "14:26:07", "remaining_time": "1:22:56"} +{"current_steps": 4955, "total_steps": 5424, "loss": 0.9183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.206243380124352e-07, "epoch": 2.74, "percentage": 91.35, "elapsed_time": "14:26:59", "remaining_time": "1:22:03"} +{"current_steps": 4960, "total_steps": 5424, "loss": 0.9074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.012587032786706e-07, "epoch": 2.74, "percentage": 91.45, "elapsed_time": "14:27:52", "remaining_time": "1:21:11"} +{"current_steps": 4965, "total_steps": 5424, "loss": 0.9255, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.820951816379263e-07, "epoch": 2.75, "percentage": 91.54, "elapsed_time": "14:28:45", "remaining_time": "1:20:18"} +{"current_steps": 4970, "total_steps": 5424, "loss": 0.9133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.631339338122324e-07, "epoch": 2.75, "percentage": 91.63, "elapsed_time": "14:29:37", "remaining_time": "1:19:26"} +{"current_steps": 4975, "total_steps": 5424, "loss": 0.9056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.443751188271703e-07, "epoch": 2.75, "percentage": 91.72, "elapsed_time": "14:30:30", "remaining_time": "1:18:33"} +{"current_steps": 4980, "total_steps": 5424, "loss": 0.8905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.258188940105549e-07, "epoch": 2.75, "percentage": 91.81, "elapsed_time": "14:31:23", "remaining_time": "1:17:41"} +{"current_steps": 4985, "total_steps": 5424, "loss": 0.9088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.074654149910821e-07, "epoch": 2.76, "percentage": 91.91, "elapsed_time": "14:32:16", "remaining_time": "1:16:48"} +{"current_steps": 4990, "total_steps": 5424, "loss": 0.8994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.893148356970748e-07, "epoch": 2.76, "percentage": 92.0, "elapsed_time": "14:33:08", "remaining_time": "1:15:56"} +{"current_steps": 4995, "total_steps": 5424, "loss": 0.8946, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.713673083551281e-07, "epoch": 2.76, "percentage": 92.09, "elapsed_time": "14:34:01", "remaining_time": "1:15:04"} +{"current_steps": 5000, "total_steps": 5424, "loss": 0.924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.536229834888913e-07, "epoch": 2.76, "percentage": 92.18, "elapsed_time": "14:34:54", "remaining_time": "1:14:11"} +{"current_steps": 5005, "total_steps": 5424, "loss": 0.9102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.360820099177712e-07, "epoch": 2.77, "percentage": 92.28, "elapsed_time": "14:35:47", "remaining_time": "1:13:19"} +{"current_steps": 5010, "total_steps": 5424, "loss": 0.9198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.187445347556859e-07, "epoch": 2.77, "percentage": 92.37, "elapsed_time": "14:36:40", "remaining_time": "1:12:26"} +{"current_steps": 5015, "total_steps": 5424, "loss": 0.9004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.016107034098524e-07, "epoch": 2.77, "percentage": 92.46, "elapsed_time": "14:37:33", "remaining_time": "1:11:34"} +{"current_steps": 5020, "total_steps": 5424, "loss": 0.8849, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.846806595795424e-07, "epoch": 2.78, "percentage": 92.55, "elapsed_time": "14:38:25", "remaining_time": "1:10:41"} +{"current_steps": 5025, "total_steps": 5424, "loss": 0.9473, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.679545452548924e-07, "epoch": 2.78, "percentage": 92.64, "elapsed_time": "14:39:18", "remaining_time": "1:09:49"} +{"current_steps": 5030, "total_steps": 5424, "loss": 0.9339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.514325007157013e-07, "epoch": 2.78, "percentage": 92.74, "elapsed_time": "14:40:11", "remaining_time": "1:08:56"} +{"current_steps": 5035, "total_steps": 5424, "loss": 0.9172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.35114664530273e-07, "epoch": 2.78, "percentage": 92.83, "elapsed_time": "14:41:04", "remaining_time": "1:08:04"} +{"current_steps": 5040, "total_steps": 5424, "loss": 0.9796, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.190011735542262e-07, "epoch": 2.79, "percentage": 92.92, "elapsed_time": "14:41:57", "remaining_time": "1:07:11"} +{"current_steps": 5045, "total_steps": 5424, "loss": 0.9367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.030921629293778e-07, "epoch": 2.79, "percentage": 93.01, "elapsed_time": "14:42:49", "remaining_time": "1:06:19"} +{"current_steps": 5050, "total_steps": 5424, "loss": 0.9254, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.873877660825783e-07, "epoch": 2.79, "percentage": 93.1, "elapsed_time": "14:43:42", "remaining_time": "1:05:26"} +{"current_steps": 5055, "total_steps": 5424, "loss": 0.8976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.718881147246252e-07, "epoch": 2.8, "percentage": 93.2, "elapsed_time": "14:44:35", "remaining_time": "1:04:34"} +{"current_steps": 5060, "total_steps": 5424, "loss": 0.9099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.565933388491263e-07, "epoch": 2.8, "percentage": 93.29, "elapsed_time": "14:45:28", "remaining_time": "1:03:41"} +{"current_steps": 5065, "total_steps": 5424, "loss": 0.9057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.415035667314328e-07, "epoch": 2.8, "percentage": 93.38, "elapsed_time": "14:46:21", "remaining_time": "1:02:49"} +{"current_steps": 5070, "total_steps": 5424, "loss": 0.8978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.266189249275521e-07, "epoch": 2.8, "percentage": 93.47, "elapsed_time": "14:47:14", "remaining_time": "1:01:56"} +{"current_steps": 5075, "total_steps": 5424, "loss": 0.9172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.119395382730929e-07, "epoch": 2.81, "percentage": 93.57, "elapsed_time": "14:48:06", "remaining_time": "1:01:04"} +{"current_steps": 5080, "total_steps": 5424, "loss": 0.9393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.974655298822129e-07, "epoch": 2.81, "percentage": 93.66, "elapsed_time": "14:48:59", "remaining_time": "1:00:11"} +{"current_steps": 5085, "total_steps": 5424, "loss": 0.9137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.831970211465892e-07, "epoch": 2.81, "percentage": 93.75, "elapsed_time": "14:49:52", "remaining_time": "0:59:19"} +{"current_steps": 5090, "total_steps": 5424, "loss": 0.8573, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6913413173439723e-07, "epoch": 2.81, "percentage": 93.84, "elapsed_time": "14:50:45", "remaining_time": "0:58:27"} +{"current_steps": 5095, "total_steps": 5424, "loss": 0.9533, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.552769795893086e-07, "epoch": 2.82, "percentage": 93.93, "elapsed_time": "14:51:38", "remaining_time": "0:57:34"} +{"current_steps": 5100, "total_steps": 5424, "loss": 0.8693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.416256809295083e-07, "epoch": 2.82, "percentage": 94.03, "elapsed_time": "14:52:31", "remaining_time": "0:56:42"} +{"current_steps": 5105, "total_steps": 5424, "loss": 0.9531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2818035024670963e-07, "epoch": 2.82, "percentage": 94.12, "elapsed_time": "14:53:24", "remaining_time": "0:55:49"} +{"current_steps": 5110, "total_steps": 5424, "loss": 0.9077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1494110030519397e-07, "epoch": 2.83, "percentage": 94.21, "elapsed_time": "14:54:17", "remaining_time": "0:54:57"} +{"current_steps": 5115, "total_steps": 5424, "loss": 0.9061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.019080421408833e-07, "epoch": 2.83, "percentage": 94.3, "elapsed_time": "14:55:10", "remaining_time": "0:54:04"} +{"current_steps": 5120, "total_steps": 5424, "loss": 0.9606, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8908128506037756e-07, "epoch": 2.83, "percentage": 94.4, "elapsed_time": "14:56:04", "remaining_time": "0:53:12"} +{"current_steps": 5125, "total_steps": 5424, "loss": 0.9335, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7646093664007456e-07, "epoch": 2.83, "percentage": 94.49, "elapsed_time": "14:56:56", "remaining_time": "0:52:19"} +{"current_steps": 5130, "total_steps": 5424, "loss": 0.9054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.640471027252346e-07, "epoch": 2.84, "percentage": 94.58, "elapsed_time": "14:57:49", "remaining_time": "0:51:27"} +{"current_steps": 5135, "total_steps": 5424, "loss": 0.8801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5183988742910903e-07, "epoch": 2.84, "percentage": 94.67, "elapsed_time": "14:58:42", "remaining_time": "0:50:34"} +{"current_steps": 5140, "total_steps": 5424, "loss": 0.913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.398393931320687e-07, "epoch": 2.84, "percentage": 94.76, "elapsed_time": "14:59:35", "remaining_time": "0:49:42"} +{"current_steps": 5145, "total_steps": 5424, "loss": 0.904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2804572048074357e-07, "epoch": 2.84, "percentage": 94.86, "elapsed_time": "15:00:28", "remaining_time": "0:48:49"} +{"current_steps": 5150, "total_steps": 5424, "loss": 0.9144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.164589683871705e-07, "epoch": 2.85, "percentage": 94.95, "elapsed_time": "15:01:21", "remaining_time": "0:47:57"} +{"current_steps": 5155, "total_steps": 5424, "loss": 0.9145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.050792340279718e-07, "epoch": 2.85, "percentage": 95.04, "elapsed_time": "15:02:14", "remaining_time": "0:47:04"} +{"current_steps": 5160, "total_steps": 5424, "loss": 0.957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.939066128435419e-07, "epoch": 2.85, "percentage": 95.13, "elapsed_time": "15:03:07", "remaining_time": "0:46:12"} +{"current_steps": 5165, "total_steps": 5424, "loss": 0.9196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.829411985372399e-07, "epoch": 2.86, "percentage": 95.22, "elapsed_time": "15:04:00", "remaining_time": "0:45:19"} +{"current_steps": 5170, "total_steps": 5424, "loss": 0.8893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7218308307460916e-07, "epoch": 2.86, "percentage": 95.32, "elapsed_time": "15:04:53", "remaining_time": "0:44:27"} +{"current_steps": 5175, "total_steps": 5424, "loss": 0.9049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.616323566825979e-07, "epoch": 2.86, "percentage": 95.41, "elapsed_time": "15:05:45", "remaining_time": "0:43:34"} +{"current_steps": 5180, "total_steps": 5424, "loss": 0.8977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.51289107848815e-07, "epoch": 2.86, "percentage": 95.5, "elapsed_time": "15:06:38", "remaining_time": "0:42:42"} +{"current_steps": 5185, "total_steps": 5424, "loss": 0.9187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4115342332078074e-07, "epoch": 2.87, "percentage": 95.59, "elapsed_time": "15:07:31", "remaining_time": "0:41:49"} +{"current_steps": 5190, "total_steps": 5424, "loss": 0.9355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.312253881051968e-07, "epoch": 2.87, "percentage": 95.69, "elapsed_time": "15:08:24", "remaining_time": "0:40:57"} +{"current_steps": 5195, "total_steps": 5424, "loss": 0.9337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2150508546723848e-07, "epoch": 2.87, "percentage": 95.78, "elapsed_time": "15:09:16", "remaining_time": "0:40:04"} +{"current_steps": 5200, "total_steps": 5424, "loss": 0.9011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.119925969298553e-07, "epoch": 2.88, "percentage": 95.87, "elapsed_time": "15:10:09", "remaining_time": "0:39:12"} +{"current_steps": 5205, "total_steps": 5424, "loss": 0.8987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0268800227307982e-07, "epoch": 2.88, "percentage": 95.96, "elapsed_time": "15:11:02", "remaining_time": "0:38:19"} +{"current_steps": 5210, "total_steps": 5424, "loss": 0.9206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9359137953337548e-07, "epoch": 2.88, "percentage": 96.05, "elapsed_time": "15:11:55", "remaining_time": "0:37:27"} +{"current_steps": 5215, "total_steps": 5424, "loss": 0.9485, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8470280500296199e-07, "epoch": 2.88, "percentage": 96.15, "elapsed_time": "15:12:47", "remaining_time": "0:36:34"} +{"current_steps": 5220, "total_steps": 5424, "loss": 0.8902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7602235322919102e-07, "epoch": 2.89, "percentage": 96.24, "elapsed_time": "15:13:40", "remaining_time": "0:35:42"} +{"current_steps": 5225, "total_steps": 5424, "loss": 0.9484, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6755009701391045e-07, "epoch": 2.89, "percentage": 96.33, "elapsed_time": "15:14:33", "remaining_time": "0:34:49"} +{"current_steps": 5230, "total_steps": 5424, "loss": 0.9361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.592861074128621e-07, "epoch": 2.89, "percentage": 96.42, "elapsed_time": "15:15:25", "remaining_time": "0:33:57"} +{"current_steps": 5235, "total_steps": 5424, "loss": 0.9407, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5123045373508226e-07, "epoch": 2.89, "percentage": 96.52, "elapsed_time": "15:16:18", "remaining_time": "0:33:04"} +{"current_steps": 5240, "total_steps": 5424, "loss": 0.8792, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4338320354231605e-07, "epoch": 2.9, "percentage": 96.61, "elapsed_time": "15:17:11", "remaining_time": "0:32:12"} +{"current_steps": 5245, "total_steps": 5424, "loss": 0.9682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3574442264846222e-07, "epoch": 2.9, "percentage": 96.7, "elapsed_time": "15:18:04", "remaining_time": "0:31:19"} +{"current_steps": 5250, "total_steps": 5424, "loss": 0.8995, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2831417511900423e-07, "epoch": 2.9, "percentage": 96.79, "elapsed_time": "15:18:56", "remaining_time": "0:30:27"} +{"current_steps": 5255, "total_steps": 5424, "loss": 0.8877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2109252327048849e-07, "epoch": 2.91, "percentage": 96.88, "elapsed_time": "15:19:49", "remaining_time": "0:29:34"} +{"current_steps": 5260, "total_steps": 5424, "loss": 0.8746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1407952766999686e-07, "epoch": 2.91, "percentage": 96.98, "elapsed_time": "15:20:42", "remaining_time": "0:28:42"} +{"current_steps": 5265, "total_steps": 5424, "loss": 0.9287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0727524713463333e-07, "epoch": 2.91, "percentage": 97.07, "elapsed_time": "15:21:34", "remaining_time": "0:27:49"} +{"current_steps": 5270, "total_steps": 5424, "loss": 0.9045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0067973873104097e-07, "epoch": 2.91, "percentage": 97.16, "elapsed_time": "15:22:27", "remaining_time": "0:26:57"} +{"current_steps": 5275, "total_steps": 5424, "loss": 0.942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.42930577749107e-08, "epoch": 2.92, "percentage": 97.25, "elapsed_time": "15:23:20", "remaining_time": "0:26:04"} +{"current_steps": 5280, "total_steps": 5424, "loss": 0.885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.811525783052888e-08, "epoch": 2.92, "percentage": 97.35, "elapsed_time": "15:24:12", "remaining_time": "0:25:12"} +{"current_steps": 5285, "total_steps": 5424, "loss": 0.8947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.214639071031926e-08, "epoch": 2.92, "percentage": 97.44, "elapsed_time": "15:25:05", "remaining_time": "0:24:19"} +{"current_steps": 5290, "total_steps": 5424, "loss": 0.9309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.638650647442125e-08, "epoch": 2.93, "percentage": 97.53, "elapsed_time": "15:25:58", "remaining_time": "0:23:27"} +{"current_steps": 5295, "total_steps": 5424, "loss": 0.896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.083565343024845e-08, "epoch": 2.93, "percentage": 97.62, "elapsed_time": "15:26:50", "remaining_time": "0:22:34"} +{"current_steps": 5300, "total_steps": 5424, "loss": 0.9132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.549387813210572e-08, "epoch": 2.93, "percentage": 97.71, "elapsed_time": "15:27:43", "remaining_time": "0:21:42"} +{"current_steps": 5305, "total_steps": 5424, "loss": 0.9185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.036122538078393e-08, "epoch": 2.93, "percentage": 97.81, "elapsed_time": "15:28:36", "remaining_time": "0:20:49"} +{"current_steps": 5310, "total_steps": 5424, "loss": 0.9285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.543773822319631e-08, "epoch": 2.94, "percentage": 97.9, "elapsed_time": "15:29:28", "remaining_time": "0:19:57"} +{"current_steps": 5315, "total_steps": 5424, "loss": 0.9074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.072345795200384e-08, "epoch": 2.94, "percentage": 97.99, "elapsed_time": "15:30:21", "remaining_time": "0:19:04"} +{"current_steps": 5320, "total_steps": 5424, "loss": 0.8915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.621842410527655e-08, "epoch": 2.94, "percentage": 98.08, "elapsed_time": "15:31:14", "remaining_time": "0:18:12"} +{"current_steps": 5325, "total_steps": 5424, "loss": 0.8647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1922674466166045e-08, "epoch": 2.94, "percentage": 98.17, "elapsed_time": "15:32:07", "remaining_time": "0:17:19"} +{"current_steps": 5330, "total_steps": 5424, "loss": 0.9393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.783624506257799e-08, "epoch": 2.95, "percentage": 98.27, "elapsed_time": "15:32:59", "remaining_time": "0:16:27"} +{"current_steps": 5335, "total_steps": 5424, "loss": 0.9303, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.395917016688344e-08, "epoch": 2.95, "percentage": 98.36, "elapsed_time": "15:33:52", "remaining_time": "0:15:34"} +{"current_steps": 5340, "total_steps": 5424, "loss": 0.8998, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.029148229561629e-08, "epoch": 2.95, "percentage": 98.45, "elapsed_time": "15:34:45", "remaining_time": "0:14:42"} +{"current_steps": 5345, "total_steps": 5424, "loss": 0.8964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6833212209206872e-08, "epoch": 2.96, "percentage": 98.54, "elapsed_time": "15:35:37", "remaining_time": "0:13:49"} +{"current_steps": 5350, "total_steps": 5424, "loss": 0.9528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.358438891173487e-08, "epoch": 2.96, "percentage": 98.64, "elapsed_time": "15:36:30", "remaining_time": "0:12:57"} +{"current_steps": 5355, "total_steps": 5424, "loss": 0.8977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0545039650665675e-08, "epoch": 2.96, "percentage": 98.73, "elapsed_time": "15:37:23", "remaining_time": "0:12:04"} +{"current_steps": 5360, "total_steps": 5424, "loss": 0.9175, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7715189916636676e-08, "epoch": 2.96, "percentage": 98.82, "elapsed_time": "15:38:15", "remaining_time": "0:11:12"} +{"current_steps": 5365, "total_steps": 5424, "loss": 0.8991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5094863443243513e-08, "epoch": 2.97, "percentage": 98.91, "elapsed_time": "15:39:08", "remaining_time": "0:10:19"} +{"current_steps": 5370, "total_steps": 5424, "loss": 0.9203, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2684082206829151e-08, "epoch": 2.97, "percentage": 99.0, "elapsed_time": "15:40:01", "remaining_time": "0:09:27"} +{"current_steps": 5375, "total_steps": 5424, "loss": 0.9347, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0482866426311799e-08, "epoch": 2.97, "percentage": 99.1, "elapsed_time": "15:40:53", "remaining_time": "0:08:34"} +{"current_steps": 5380, "total_steps": 5424, "loss": 0.9361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.491234563010041e-09, "epoch": 2.97, "percentage": 99.19, "elapsed_time": "15:41:46", "remaining_time": "0:07:42"} +{"current_steps": 5385, "total_steps": 5424, "loss": 0.8695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.709203320484636e-09, "epoch": 2.98, "percentage": 99.28, "elapsed_time": "15:42:39", "remaining_time": "0:06:49"} +{"current_steps": 5390, "total_steps": 5424, "loss": 0.981, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.13678764441361e-09, "epoch": 2.98, "percentage": 99.37, "elapsed_time": "15:43:31", "remaining_time": "0:05:57"} +{"current_steps": 5395, "total_steps": 5424, "loss": 0.8908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.774000722439608e-09, "epoch": 2.98, "percentage": 99.47, "elapsed_time": "15:44:24", "remaining_time": "0:05:04"} +{"current_steps": 5400, "total_steps": 5424, "loss": 0.9283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6208539840894e-09, "epoch": 2.99, "percentage": 99.56, "elapsed_time": "15:45:17", "remaining_time": "0:04:12"} +{"current_steps": 5405, "total_steps": 5424, "loss": 0.9211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6773571006573062e-09, "epoch": 2.99, "percentage": 99.65, "elapsed_time": "15:46:10", "remaining_time": "0:03:19"} +{"current_steps": 5410, "total_steps": 5424, "loss": 0.8897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.435179851330355e-10, "epoch": 2.99, "percentage": 99.74, "elapsed_time": "15:47:02", "remaining_time": "0:02:27"} +{"current_steps": 5415, "total_steps": 5424, "loss": 0.9343, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1934279213229346e-10, "epoch": 2.99, "percentage": 99.83, "elapsed_time": "15:47:55", "remaining_time": "0:01:34"} +{"current_steps": 5420, "total_steps": 5424, "loss": 0.931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0483591784404834e-10, "epoch": 3.0, "percentage": 99.93, "elapsed_time": "15:48:48", "remaining_time": "0:00:42"} +{"current_steps": 5424, "total_steps": 5424, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "15:49:30", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2c78b900ffe7c9c8fca51287d41812f2e4e65e0 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,6529 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9991705833563724, + "global_step": 5424, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999895164082156e-05, + "loss": 1.6349, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 4.999958065720787e-05, + "loss": 1.6199, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.999905648201487e-05, + "loss": 1.4834, + "step": 15 + }, + { + "epoch": 0.01, + "learning_rate": 4.999832264289934e-05, + "loss": 1.3882, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999737914601591e-05, + "loss": 1.3679, + "step": 25 + }, + { + "epoch": 0.02, + "learning_rate": 4.999622599927756e-05, + "loss": 1.2396, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999486321235559e-05, + "loss": 1.321, + "step": 35 + }, + { + "epoch": 0.02, + "learning_rate": 4.9993290796679516e-05, + "loss": 1.2874, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 4.999150876543699e-05, + "loss": 1.2607, + "step": 45 + }, + { + "epoch": 0.03, + "learning_rate": 4.9989517133573694e-05, + "loss": 1.2454, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9987315917793174e-05, + "loss": 1.2799, + "step": 55 + }, + { + "epoch": 0.03, + "learning_rate": 4.998490513655676e-05, + "loss": 1.2575, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.998228481008337e-05, + "loss": 1.2404, + "step": 65 + }, + { + "epoch": 0.04, + "learning_rate": 4.997945496034934e-05, + "loss": 1.2219, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9976415611088267e-05, + "loss": 1.2241, + "step": 75 + }, + { + "epoch": 0.04, + "learning_rate": 4.997316678779079e-05, + "loss": 1.1716, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.996970851770438e-05, + "loss": 1.1883, + "step": 85 + }, + { + "epoch": 0.05, + "learning_rate": 4.9966040829833115e-05, + "loss": 1.205, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.9962163754937426e-05, + "loss": 1.1246, + "step": 95 + }, + { + "epoch": 0.06, + "learning_rate": 4.995807732553384e-05, + "loss": 1.1636, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9953781575894723e-05, + "loss": 1.158, + "step": 105 + }, + { + "epoch": 0.06, + "learning_rate": 4.9949276542048e-05, + "loss": 1.1477, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.9944562261776805e-05, + "loss": 1.1678, + "step": 115 + }, + { + "epoch": 0.07, + "learning_rate": 4.9939638774619216e-05, + "loss": 1.1501, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.99345061218679e-05, + "loss": 1.1955, + "step": 125 + }, + { + "epoch": 0.07, + "learning_rate": 4.9929164346569756e-05, + "loss": 1.1724, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9923613493525576e-05, + "loss": 1.177, + "step": 135 + }, + { + "epoch": 0.08, + "learning_rate": 4.991785360928968e-05, + "loss": 1.1418, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.991188474216947e-05, + "loss": 1.1898, + "step": 145 + }, + { + "epoch": 0.08, + "learning_rate": 4.9905706942225094e-05, + "loss": 1.1479, + "step": 150 + }, + { + "epoch": 0.09, + "learning_rate": 4.9899320261268966e-05, + "loss": 1.1356, + "step": 155 + }, + { + "epoch": 0.09, + "learning_rate": 4.989272475286537e-05, + "loss": 1.1397, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.9885920472330004e-05, + "loss": 1.1215, + "step": 165 + }, + { + "epoch": 0.09, + "learning_rate": 4.9878907476729516e-05, + "loss": 1.167, + "step": 170 + }, + { + "epoch": 0.1, + "learning_rate": 4.9871685824881e-05, + "loss": 1.1219, + "step": 175 + }, + { + "epoch": 0.1, + "learning_rate": 4.9864255577351534e-05, + "loss": 1.0835, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.985661679645769e-05, + "loss": 1.0721, + "step": 185 + }, + { + "epoch": 0.11, + "learning_rate": 4.9848769546264915e-05, + "loss": 1.0692, + "step": 190 + }, + { + "epoch": 0.11, + "learning_rate": 4.9840713892587146e-05, + "loss": 1.0488, + "step": 195 + }, + { + "epoch": 0.11, + "learning_rate": 4.983244990298609e-05, + "loss": 1.1285, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.982397764677081e-05, + "loss": 1.0832, + "step": 205 + }, + { + "epoch": 0.12, + "learning_rate": 4.981529719499704e-05, + "loss": 1.0652, + "step": 210 + }, + { + "epoch": 0.12, + "learning_rate": 4.980640862046663e-05, + "loss": 1.1043, + "step": 215 + }, + { + "epoch": 0.12, + "learning_rate": 4.979731199772693e-05, + "loss": 1.112, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.9788007403070146e-05, + "loss": 1.1029, + "step": 225 + }, + { + "epoch": 0.13, + "learning_rate": 4.977849491453277e-05, + "loss": 1.0869, + "step": 230 + }, + { + "epoch": 0.13, + "learning_rate": 4.976877461189481e-05, + "loss": 1.0843, + "step": 235 + }, + { + "epoch": 0.13, + "learning_rate": 4.975884657667922e-05, + "loss": 1.0789, + "step": 240 + }, + { + "epoch": 0.14, + "learning_rate": 4.974871089215118e-05, + "loss": 1.0449, + "step": 245 + }, + { + "epoch": 0.14, + "learning_rate": 4.9738367643317405e-05, + "loss": 1.1053, + "step": 250 + }, + { + "epoch": 0.14, + "learning_rate": 4.9727816916925395e-05, + "loss": 1.0651, + "step": 255 + }, + { + "epoch": 0.14, + "learning_rate": 4.971705880146276e-05, + "loss": 1.0828, + "step": 260 + }, + { + "epoch": 0.15, + "learning_rate": 4.970609338715646e-05, + "loss": 1.0932, + "step": 265 + }, + { + "epoch": 0.15, + "learning_rate": 4.969492076597203e-05, + "loss": 1.0648, + "step": 270 + }, + { + "epoch": 0.15, + "learning_rate": 4.968354103161283e-05, + "loss": 1.0948, + "step": 275 + }, + { + "epoch": 0.15, + "learning_rate": 4.967195427951926e-05, + "loss": 1.0721, + "step": 280 + }, + { + "epoch": 0.16, + "learning_rate": 4.9660160606867936e-05, + "loss": 1.124, + "step": 285 + }, + { + "epoch": 0.16, + "learning_rate": 4.9648160112570896e-05, + "loss": 1.0963, + "step": 290 + }, + { + "epoch": 0.16, + "learning_rate": 4.9635952897274773e-05, + "loss": 1.1078, + "step": 295 + }, + { + "epoch": 0.17, + "learning_rate": 4.9623539063359925e-05, + "loss": 1.1059, + "step": 300 + }, + { + "epoch": 0.17, + "learning_rate": 4.961091871493962e-05, + "loss": 1.1032, + "step": 305 + }, + { + "epoch": 0.17, + "learning_rate": 4.959809195785912e-05, + "loss": 1.0595, + "step": 310 + }, + { + "epoch": 0.17, + "learning_rate": 4.958505889969481e-05, + "loss": 1.1096, + "step": 315 + }, + { + "epoch": 0.18, + "learning_rate": 4.957181964975329e-05, + "loss": 1.0589, + "step": 320 + }, + { + "epoch": 0.18, + "learning_rate": 4.955837431907049e-05, + "loss": 1.0608, + "step": 325 + }, + { + "epoch": 0.18, + "learning_rate": 4.954472302041069e-05, + "loss": 1.0819, + "step": 330 + }, + { + "epoch": 0.19, + "learning_rate": 4.9530865868265605e-05, + "loss": 1.0759, + "step": 335 + }, + { + "epoch": 0.19, + "learning_rate": 4.951680297885342e-05, + "loss": 1.0515, + "step": 340 + }, + { + "epoch": 0.19, + "learning_rate": 4.950253447011779e-05, + "loss": 1.0371, + "step": 345 + }, + { + "epoch": 0.19, + "learning_rate": 4.948806046172691e-05, + "loss": 1.0619, + "step": 350 + }, + { + "epoch": 0.2, + "learning_rate": 4.947338107507245e-05, + "loss": 1.0757, + "step": 355 + }, + { + "epoch": 0.2, + "learning_rate": 4.945849643326857e-05, + "loss": 1.0686, + "step": 360 + }, + { + "epoch": 0.2, + "learning_rate": 4.9443406661150874e-05, + "loss": 1.0809, + "step": 365 + }, + { + "epoch": 0.2, + "learning_rate": 4.942811188527537e-05, + "loss": 1.0704, + "step": 370 + }, + { + "epoch": 0.21, + "learning_rate": 4.941261223391742e-05, + "loss": 1.0655, + "step": 375 + }, + { + "epoch": 0.21, + "learning_rate": 4.939690783707063e-05, + "loss": 1.1182, + "step": 380 + }, + { + "epoch": 0.21, + "learning_rate": 4.938099882644578e-05, + "loss": 1.081, + "step": 385 + }, + { + "epoch": 0.22, + "learning_rate": 4.9364885335469734e-05, + "loss": 1.0792, + "step": 390 + }, + { + "epoch": 0.22, + "learning_rate": 4.93485674992843e-05, + "loss": 1.0244, + "step": 395 + }, + { + "epoch": 0.22, + "learning_rate": 4.933204545474511e-05, + "loss": 1.0531, + "step": 400 + }, + { + "epoch": 0.22, + "learning_rate": 4.9315319340420465e-05, + "loss": 1.048, + "step": 405 + }, + { + "epoch": 0.23, + "learning_rate": 4.929838929659015e-05, + "loss": 1.04, + "step": 410 + }, + { + "epoch": 0.23, + "learning_rate": 4.9281255465244314e-05, + "loss": 1.0686, + "step": 415 + }, + { + "epoch": 0.23, + "learning_rate": 4.926391799008223e-05, + "loss": 1.0933, + "step": 420 + }, + { + "epoch": 0.24, + "learning_rate": 4.924637701651111e-05, + "loss": 1.0313, + "step": 425 + }, + { + "epoch": 0.24, + "learning_rate": 4.9228632691644874e-05, + "loss": 1.0811, + "step": 430 + }, + { + "epoch": 0.24, + "learning_rate": 4.921068516430293e-05, + "loss": 1.0617, + "step": 435 + }, + { + "epoch": 0.24, + "learning_rate": 4.919253458500892e-05, + "loss": 1.0482, + "step": 440 + }, + { + "epoch": 0.25, + "learning_rate": 4.9174181105989445e-05, + "loss": 1.0681, + "step": 445 + }, + { + "epoch": 0.25, + "learning_rate": 4.9155624881172834e-05, + "loss": 1.0476, + "step": 450 + }, + { + "epoch": 0.25, + "learning_rate": 4.913686606618777e-05, + "loss": 1.0463, + "step": 455 + }, + { + "epoch": 0.25, + "learning_rate": 4.911790481836208e-05, + "loss": 1.0213, + "step": 460 + }, + { + "epoch": 0.26, + "learning_rate": 4.909874129672133e-05, + "loss": 0.9855, + "step": 465 + }, + { + "epoch": 0.26, + "learning_rate": 4.907937566198757e-05, + "loss": 1.045, + "step": 470 + }, + { + "epoch": 0.26, + "learning_rate": 4.9059808076577914e-05, + "loss": 1.0442, + "step": 475 + }, + { + "epoch": 0.27, + "learning_rate": 4.904003870460323e-05, + "loss": 1.0354, + "step": 480 + }, + { + "epoch": 0.27, + "learning_rate": 4.9020067711866735e-05, + "loss": 1.0331, + "step": 485 + }, + { + "epoch": 0.27, + "learning_rate": 4.899989526586261e-05, + "loss": 1.0585, + "step": 490 + }, + { + "epoch": 0.27, + "learning_rate": 4.8979521535774636e-05, + "loss": 1.0223, + "step": 495 + }, + { + "epoch": 0.28, + "learning_rate": 4.895894669247468e-05, + "loss": 1.0118, + "step": 500 + }, + { + "epoch": 0.28, + "learning_rate": 4.8938170908521356e-05, + "loss": 1.0508, + "step": 505 + }, + { + "epoch": 0.28, + "learning_rate": 4.8917194358158534e-05, + "loss": 1.0656, + "step": 510 + }, + { + "epoch": 0.28, + "learning_rate": 4.8896017217313886e-05, + "loss": 1.0655, + "step": 515 + }, + { + "epoch": 0.29, + "learning_rate": 4.887463966359741e-05, + "loss": 1.0833, + "step": 520 + }, + { + "epoch": 0.29, + "learning_rate": 4.8853061876299956e-05, + "loss": 1.068, + "step": 525 + }, + { + "epoch": 0.29, + "learning_rate": 4.8831284036391684e-05, + "loss": 1.0487, + "step": 530 + }, + { + "epoch": 0.3, + "learning_rate": 4.880930632652058e-05, + "loss": 1.059, + "step": 535 + }, + { + "epoch": 0.3, + "learning_rate": 4.878712893101092e-05, + "loss": 1.0408, + "step": 540 + }, + { + "epoch": 0.3, + "learning_rate": 4.876475203586171e-05, + "loss": 0.9976, + "step": 545 + }, + { + "epoch": 0.3, + "learning_rate": 4.874217582874514e-05, + "loss": 1.0358, + "step": 550 + }, + { + "epoch": 0.31, + "learning_rate": 4.8719400499005e-05, + "loss": 1.0073, + "step": 555 + }, + { + "epoch": 0.31, + "learning_rate": 4.869642623765509e-05, + "loss": 1.0742, + "step": 560 + }, + { + "epoch": 0.31, + "learning_rate": 4.867325323737765e-05, + "loss": 1.0463, + "step": 565 + }, + { + "epoch": 0.32, + "learning_rate": 4.864988169252168e-05, + "loss": 1.0242, + "step": 570 + }, + { + "epoch": 0.32, + "learning_rate": 4.8626311799101375e-05, + "loss": 1.0773, + "step": 575 + }, + { + "epoch": 0.32, + "learning_rate": 4.860254375479446e-05, + "loss": 1.0371, + "step": 580 + }, + { + "epoch": 0.32, + "learning_rate": 4.8578577758940504e-05, + "loss": 1.0399, + "step": 585 + }, + { + "epoch": 0.33, + "learning_rate": 4.855441401253928e-05, + "loss": 1.0411, + "step": 590 + }, + { + "epoch": 0.33, + "learning_rate": 4.8530052718249076e-05, + "loss": 1.0476, + "step": 595 + }, + { + "epoch": 0.33, + "learning_rate": 4.850549408038498e-05, + "loss": 1.0075, + "step": 600 + }, + { + "epoch": 0.33, + "learning_rate": 4.848073830491717e-05, + "loss": 1.0045, + "step": 605 + }, + { + "epoch": 0.34, + "learning_rate": 4.845578559946923e-05, + "loss": 1.0368, + "step": 610 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430636173316306e-05, + "loss": 1.0156, + "step": 615 + }, + { + "epoch": 0.34, + "learning_rate": 4.840529023738348e-05, + "loss": 1.0334, + "step": 620 + }, + { + "epoch": 0.35, + "learning_rate": 4.837974800424389e-05, + "loss": 0.9994, + "step": 625 + }, + { + "epoch": 0.35, + "learning_rate": 4.8354009688117026e-05, + "loss": 1.0467, + "step": 630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8328075504866874e-05, + "loss": 0.9968, + "step": 635 + }, + { + "epoch": 0.35, + "learning_rate": 4.8301945672000164e-05, + "loss": 1.041, + "step": 640 + }, + { + "epoch": 0.36, + "learning_rate": 4.8275620408664487e-05, + "loss": 0.9811, + "step": 645 + }, + { + "epoch": 0.36, + "learning_rate": 4.8249099935646494e-05, + "loss": 1.0235, + "step": 650 + }, + { + "epoch": 0.36, + "learning_rate": 4.822238447537003e-05, + "loss": 1.0152, + "step": 655 + }, + { + "epoch": 0.36, + "learning_rate": 4.819547425189429e-05, + "loss": 1.038, + "step": 660 + }, + { + "epoch": 0.37, + "learning_rate": 4.81683694909119e-05, + "loss": 1.0387, + "step": 665 + }, + { + "epoch": 0.37, + "learning_rate": 4.814107041974707e-05, + "loss": 1.019, + "step": 670 + }, + { + "epoch": 0.37, + "learning_rate": 4.811357726735366e-05, + "loss": 1.0403, + "step": 675 + }, + { + "epoch": 0.38, + "learning_rate": 4.808589026431324e-05, + "loss": 1.0828, + "step": 680 + }, + { + "epoch": 0.38, + "learning_rate": 4.805800964283322e-05, + "loss": 1.0128, + "step": 685 + }, + { + "epoch": 0.38, + "learning_rate": 4.802993563674483e-05, + "loss": 1.0502, + "step": 690 + }, + { + "epoch": 0.38, + "learning_rate": 4.80016684815012e-05, + "loss": 1.0169, + "step": 695 + }, + { + "epoch": 0.39, + "learning_rate": 4.7973208414175406e-05, + "loss": 1.0316, + "step": 700 + }, + { + "epoch": 0.39, + "learning_rate": 4.794455567345842e-05, + "loss": 1.0263, + "step": 705 + }, + { + "epoch": 0.39, + "learning_rate": 4.791571049965714e-05, + "loss": 1.0801, + "step": 710 + }, + { + "epoch": 0.4, + "learning_rate": 4.7886673134692404e-05, + "loss": 1.0575, + "step": 715 + }, + { + "epoch": 0.4, + "learning_rate": 4.7857443822096905e-05, + "loss": 0.9767, + "step": 720 + }, + { + "epoch": 0.4, + "learning_rate": 4.782802280701319e-05, + "loss": 0.9666, + "step": 725 + }, + { + "epoch": 0.4, + "learning_rate": 4.77984103361916e-05, + "loss": 0.9983, + "step": 730 + }, + { + "epoch": 0.41, + "learning_rate": 4.776860665798816e-05, + "loss": 1.0219, + "step": 735 + }, + { + "epoch": 0.41, + "learning_rate": 4.773861202236257e-05, + "loss": 0.9963, + "step": 740 + }, + { + "epoch": 0.41, + "learning_rate": 4.770842668087602e-05, + "loss": 1.0219, + "step": 745 + }, + { + "epoch": 0.41, + "learning_rate": 4.767805088668916e-05, + "loss": 1.0029, + "step": 750 + }, + { + "epoch": 0.42, + "learning_rate": 4.7647484894559936e-05, + "loss": 1.0077, + "step": 755 + }, + { + "epoch": 0.42, + "learning_rate": 4.7616728960841444e-05, + "loss": 0.9908, + "step": 760 + }, + { + "epoch": 0.42, + "learning_rate": 4.758578334347981e-05, + "loss": 1.0092, + "step": 765 + }, + { + "epoch": 0.43, + "learning_rate": 4.7554648302012015e-05, + "loss": 0.9874, + "step": 770 + }, + { + "epoch": 0.43, + "learning_rate": 4.7523324097563706e-05, + "loss": 1.0185, + "step": 775 + }, + { + "epoch": 0.43, + "learning_rate": 4.749181099284703e-05, + "loss": 1.0533, + "step": 780 + }, + { + "epoch": 0.43, + "learning_rate": 4.746010925215839e-05, + "loss": 1.0083, + "step": 785 + }, + { + "epoch": 0.44, + "learning_rate": 4.74282191413763e-05, + "loss": 1.0191, + "step": 790 + }, + { + "epoch": 0.44, + "learning_rate": 4.7396140927959045e-05, + "loss": 0.9786, + "step": 795 + }, + { + "epoch": 0.44, + "learning_rate": 4.7363874880942574e-05, + "loss": 1.0197, + "step": 800 + }, + { + "epoch": 0.45, + "learning_rate": 4.733142127093813e-05, + "loss": 1.0471, + "step": 805 + }, + { + "epoch": 0.45, + "learning_rate": 4.7298780370130014e-05, + "loss": 1.0134, + "step": 810 + }, + { + "epoch": 0.45, + "learning_rate": 4.726595245227336e-05, + "loss": 0.9801, + "step": 815 + }, + { + "epoch": 0.45, + "learning_rate": 4.723293779269173e-05, + "loss": 0.9944, + "step": 820 + }, + { + "epoch": 0.46, + "learning_rate": 4.7199736668274924e-05, + "loss": 1.0358, + "step": 825 + }, + { + "epoch": 0.46, + "learning_rate": 4.716634935747655e-05, + "loss": 0.9907, + "step": 830 + }, + { + "epoch": 0.46, + "learning_rate": 4.713277614031177e-05, + "loss": 1.0698, + "step": 835 + }, + { + "epoch": 0.46, + "learning_rate": 4.70990172983549e-05, + "loss": 1.0466, + "step": 840 + }, + { + "epoch": 0.47, + "learning_rate": 4.706507311473707e-05, + "loss": 0.9489, + "step": 845 + }, + { + "epoch": 0.47, + "learning_rate": 4.703094387414385e-05, + "loss": 0.9936, + "step": 850 + }, + { + "epoch": 0.47, + "learning_rate": 4.699662986281288e-05, + "loss": 0.9672, + "step": 855 + }, + { + "epoch": 0.48, + "learning_rate": 4.696213136853141e-05, + "loss": 1.0164, + "step": 860 + }, + { + "epoch": 0.48, + "learning_rate": 4.6927448680633954e-05, + "loss": 0.9817, + "step": 865 + }, + { + "epoch": 0.48, + "learning_rate": 4.689258208999983e-05, + "loss": 1.028, + "step": 870 + }, + { + "epoch": 0.48, + "learning_rate": 4.6857531889050716e-05, + "loss": 1.0239, + "step": 875 + }, + { + "epoch": 0.49, + "learning_rate": 4.682229837174821e-05, + "loss": 1.0378, + "step": 880 + }, + { + "epoch": 0.49, + "learning_rate": 4.678688183359135e-05, + "loss": 0.9908, + "step": 885 + }, + { + "epoch": 0.49, + "learning_rate": 4.675128257161418e-05, + "loss": 0.9384, + "step": 890 + }, + { + "epoch": 0.49, + "learning_rate": 4.671550088438319e-05, + "loss": 0.9481, + "step": 895 + }, + { + "epoch": 0.5, + "learning_rate": 4.6679537071994874e-05, + "loss": 0.9541, + "step": 900 + }, + { + "epoch": 0.5, + "learning_rate": 4.6643391436073165e-05, + "loss": 0.9787, + "step": 905 + }, + { + "epoch": 0.5, + "learning_rate": 4.660706427976693e-05, + "loss": 0.9897, + "step": 910 + }, + { + "epoch": 0.51, + "learning_rate": 4.657055590774745e-05, + "loss": 0.9947, + "step": 915 + }, + { + "epoch": 0.51, + "learning_rate": 4.6533866626205805e-05, + "loss": 0.9614, + "step": 920 + }, + { + "epoch": 0.51, + "learning_rate": 4.649699674285036e-05, + "loss": 1.0323, + "step": 925 + }, + { + "epoch": 0.51, + "learning_rate": 4.645994656690417e-05, + "loss": 1.0345, + "step": 930 + }, + { + "epoch": 0.52, + "learning_rate": 4.642271640910235e-05, + "loss": 1.0432, + "step": 935 + }, + { + "epoch": 0.52, + "learning_rate": 4.638530658168954e-05, + "loss": 1.0102, + "step": 940 + }, + { + "epoch": 0.52, + "learning_rate": 4.6347717398417203e-05, + "loss": 1.0361, + "step": 945 + }, + { + "epoch": 0.53, + "learning_rate": 4.6309949174541096e-05, + "loss": 0.9818, + "step": 950 + }, + { + "epoch": 0.53, + "learning_rate": 4.627200222681851e-05, + "loss": 1.0293, + "step": 955 + }, + { + "epoch": 0.53, + "learning_rate": 4.6233876873505694e-05, + "loss": 1.0125, + "step": 960 + }, + { + "epoch": 0.53, + "learning_rate": 4.619557343435516e-05, + "loss": 1.0039, + "step": 965 + }, + { + "epoch": 0.54, + "learning_rate": 4.615709223061302e-05, + "loss": 1.0146, + "step": 970 + }, + { + "epoch": 0.54, + "learning_rate": 4.611843358501624e-05, + "loss": 1.0352, + "step": 975 + }, + { + "epoch": 0.54, + "learning_rate": 4.6079597821789993e-05, + "loss": 1.0405, + "step": 980 + }, + { + "epoch": 0.54, + "learning_rate": 4.604058526664491e-05, + "loss": 0.9834, + "step": 985 + }, + { + "epoch": 0.55, + "learning_rate": 4.600139624677436e-05, + "loss": 1.0252, + "step": 990 + }, + { + "epoch": 0.55, + "learning_rate": 4.596203109085168e-05, + "loss": 1.0516, + "step": 995 + }, + { + "epoch": 0.55, + "learning_rate": 4.5922490129027464e-05, + "loss": 0.9762, + "step": 1000 + }, + { + "epoch": 0.56, + "learning_rate": 4.588277369292674e-05, + "loss": 0.9801, + "step": 1005 + }, + { + "epoch": 0.56, + "learning_rate": 4.5842882115646234e-05, + "loss": 1.0127, + "step": 1010 + }, + { + "epoch": 0.56, + "learning_rate": 4.580281573175157e-05, + "loss": 1.0415, + "step": 1015 + }, + { + "epoch": 0.56, + "learning_rate": 4.576257487727442e-05, + "loss": 0.9974, + "step": 1020 + }, + { + "epoch": 0.57, + "learning_rate": 4.572215988970974e-05, + "loss": 1.0097, + "step": 1025 + }, + { + "epoch": 0.57, + "learning_rate": 4.568157110801293e-05, + "loss": 1.0004, + "step": 1030 + }, + { + "epoch": 0.57, + "learning_rate": 4.5640808872596944e-05, + "loss": 0.9932, + "step": 1035 + }, + { + "epoch": 0.58, + "learning_rate": 4.5599873525329505e-05, + "loss": 0.9437, + "step": 1040 + }, + { + "epoch": 0.58, + "learning_rate": 4.555876540953019e-05, + "loss": 1.0004, + "step": 1045 + }, + { + "epoch": 0.58, + "learning_rate": 4.551748486996755e-05, + "loss": 0.9743, + "step": 1050 + }, + { + "epoch": 0.58, + "learning_rate": 4.547603225285626e-05, + "loss": 1.0303, + "step": 1055 + }, + { + "epoch": 0.59, + "learning_rate": 4.543440790585417e-05, + "loss": 1.0225, + "step": 1060 + }, + { + "epoch": 0.59, + "learning_rate": 4.539261217805939e-05, + "loss": 1.0102, + "step": 1065 + }, + { + "epoch": 0.59, + "learning_rate": 4.535064542000743e-05, + "loss": 1.0087, + "step": 1070 + }, + { + "epoch": 0.59, + "learning_rate": 4.5308507983668165e-05, + "loss": 0.952, + "step": 1075 + }, + { + "epoch": 0.6, + "learning_rate": 4.526620022244293e-05, + "loss": 0.9767, + "step": 1080 + }, + { + "epoch": 0.6, + "learning_rate": 4.522372249116158e-05, + "loss": 1.0049, + "step": 1085 + }, + { + "epoch": 0.6, + "learning_rate": 4.5181075146079456e-05, + "loss": 1.0397, + "step": 1090 + }, + { + "epoch": 0.61, + "learning_rate": 4.5138258544874455e-05, + "loss": 1.0167, + "step": 1095 + }, + { + "epoch": 0.61, + "learning_rate": 4.5095273046643985e-05, + "loss": 0.9966, + "step": 1100 + }, + { + "epoch": 0.61, + "learning_rate": 4.5052119011901986e-05, + "loss": 0.983, + "step": 1105 + }, + { + "epoch": 0.61, + "learning_rate": 4.500879680257587e-05, + "loss": 1.0301, + "step": 1110 + }, + { + "epoch": 0.62, + "learning_rate": 4.4965306782003535e-05, + "loss": 1.0371, + "step": 1115 + }, + { + "epoch": 0.62, + "learning_rate": 4.492164931493028e-05, + "loss": 1.0083, + "step": 1120 + }, + { + "epoch": 0.62, + "learning_rate": 4.487782476750575e-05, + "loss": 0.9728, + "step": 1125 + }, + { + "epoch": 0.62, + "learning_rate": 4.4833833507280884e-05, + "loss": 0.9892, + "step": 1130 + }, + { + "epoch": 0.63, + "learning_rate": 4.4789675903204805e-05, + "loss": 0.9847, + "step": 1135 + }, + { + "epoch": 0.63, + "learning_rate": 4.474535232562176e-05, + "loss": 1.0545, + "step": 1140 + }, + { + "epoch": 0.63, + "learning_rate": 4.470086314626797e-05, + "loss": 1.0204, + "step": 1145 + }, + { + "epoch": 0.64, + "learning_rate": 4.465620873826856e-05, + "loss": 0.9648, + "step": 1150 + }, + { + "epoch": 0.64, + "learning_rate": 4.46113894761344e-05, + "loss": 0.9545, + "step": 1155 + }, + { + "epoch": 0.64, + "learning_rate": 4.456640573575896e-05, + "loss": 1.0256, + "step": 1160 + }, + { + "epoch": 0.64, + "learning_rate": 4.4521257894415183e-05, + "loss": 0.99, + "step": 1165 + }, + { + "epoch": 0.65, + "learning_rate": 4.44759463307523e-05, + "loss": 0.9906, + "step": 1170 + }, + { + "epoch": 0.65, + "learning_rate": 4.443047142479266e-05, + "loss": 0.9876, + "step": 1175 + }, + { + "epoch": 0.65, + "learning_rate": 4.4384833557928553e-05, + "loss": 1.0495, + "step": 1180 + }, + { + "epoch": 0.66, + "learning_rate": 4.4339033112918966e-05, + "loss": 0.9869, + "step": 1185 + }, + { + "epoch": 0.66, + "learning_rate": 4.4293070473886456e-05, + "loss": 1.0299, + "step": 1190 + }, + { + "epoch": 0.66, + "learning_rate": 4.424694602631385e-05, + "loss": 1.0073, + "step": 1195 + }, + { + "epoch": 0.66, + "learning_rate": 4.420066015704105e-05, + "loss": 1.0023, + "step": 1200 + }, + { + "epoch": 0.67, + "learning_rate": 4.41542132542618e-05, + "loss": 0.9462, + "step": 1205 + }, + { + "epoch": 0.67, + "learning_rate": 4.410760570752037e-05, + "loss": 1.0116, + "step": 1210 + }, + { + "epoch": 0.67, + "learning_rate": 4.4060837907708375e-05, + "loss": 0.9652, + "step": 1215 + }, + { + "epoch": 0.67, + "learning_rate": 4.401391024706142e-05, + "loss": 1.0411, + "step": 1220 + }, + { + "epoch": 0.68, + "learning_rate": 4.396682311915586e-05, + "loss": 0.9691, + "step": 1225 + }, + { + "epoch": 0.68, + "learning_rate": 4.3919576918905495e-05, + "loss": 0.961, + "step": 1230 + }, + { + "epoch": 0.68, + "learning_rate": 4.387217204255819e-05, + "loss": 0.9602, + "step": 1235 + }, + { + "epoch": 0.69, + "learning_rate": 4.3824608887692666e-05, + "loss": 1.0197, + "step": 1240 + }, + { + "epoch": 0.69, + "learning_rate": 4.377688785321507e-05, + "loss": 0.9982, + "step": 1245 + }, + { + "epoch": 0.69, + "learning_rate": 4.372900933935569e-05, + "loss": 0.986, + "step": 1250 + }, + { + "epoch": 0.69, + "learning_rate": 4.368097374766556e-05, + "loss": 0.9744, + "step": 1255 + }, + { + "epoch": 0.7, + "learning_rate": 4.3632781481013105e-05, + "loss": 1.0078, + "step": 1260 + }, + { + "epoch": 0.7, + "learning_rate": 4.358443294358077e-05, + "loss": 0.9865, + "step": 1265 + }, + { + "epoch": 0.7, + "learning_rate": 4.35359285408616e-05, + "loss": 0.976, + "step": 1270 + }, + { + "epoch": 0.71, + "learning_rate": 4.348726867965591e-05, + "loss": 1.0141, + "step": 1275 + }, + { + "epoch": 0.71, + "learning_rate": 4.343845376806777e-05, + "loss": 0.952, + "step": 1280 + }, + { + "epoch": 0.71, + "learning_rate": 4.338948421550169e-05, + "loss": 1.0239, + "step": 1285 + }, + { + "epoch": 0.71, + "learning_rate": 4.334036043265909e-05, + "loss": 0.994, + "step": 1290 + }, + { + "epoch": 0.72, + "learning_rate": 4.329108283153492e-05, + "loss": 1.0505, + "step": 1295 + }, + { + "epoch": 0.72, + "learning_rate": 4.3241651825414195e-05, + "loss": 0.9919, + "step": 1300 + }, + { + "epoch": 0.72, + "learning_rate": 4.31920678288685e-05, + "loss": 1.0169, + "step": 1305 + }, + { + "epoch": 0.72, + "learning_rate": 4.3142331257752546e-05, + "loss": 0.9938, + "step": 1310 + }, + { + "epoch": 0.73, + "learning_rate": 4.309244252920064e-05, + "loss": 0.92, + "step": 1315 + }, + { + "epoch": 0.73, + "learning_rate": 4.304240206162326e-05, + "loss": 0.9537, + "step": 1320 + }, + { + "epoch": 0.73, + "learning_rate": 4.299221027470345e-05, + "loss": 0.996, + "step": 1325 + }, + { + "epoch": 0.74, + "learning_rate": 4.294186758939339e-05, + "loss": 0.9652, + "step": 1330 + }, + { + "epoch": 0.74, + "learning_rate": 4.2891374427910795e-05, + "loss": 1.0138, + "step": 1335 + }, + { + "epoch": 0.74, + "learning_rate": 4.284073121373544e-05, + "loss": 0.9267, + "step": 1340 + }, + { + "epoch": 0.74, + "learning_rate": 4.278993837160553e-05, + "loss": 0.9899, + "step": 1345 + }, + { + "epoch": 0.75, + "learning_rate": 4.273899632751422e-05, + "loss": 0.963, + "step": 1350 + }, + { + "epoch": 0.75, + "learning_rate": 4.2687905508705974e-05, + "loss": 1.0066, + "step": 1355 + }, + { + "epoch": 0.75, + "learning_rate": 4.263666634367303e-05, + "loss": 0.9939, + "step": 1360 + }, + { + "epoch": 0.75, + "learning_rate": 4.258527926215178e-05, + "loss": 0.9667, + "step": 1365 + }, + { + "epoch": 0.76, + "learning_rate": 4.253374469511917e-05, + "loss": 0.9999, + "step": 1370 + }, + { + "epoch": 0.76, + "learning_rate": 4.248206307478909e-05, + "loss": 0.9738, + "step": 1375 + }, + { + "epoch": 0.76, + "learning_rate": 4.243023483460875e-05, + "loss": 0.9829, + "step": 1380 + }, + { + "epoch": 0.77, + "learning_rate": 4.237826040925503e-05, + "loss": 1.0107, + "step": 1385 + }, + { + "epoch": 0.77, + "learning_rate": 4.232614023463088e-05, + "loss": 0.9992, + "step": 1390 + }, + { + "epoch": 0.77, + "learning_rate": 4.227387474786159e-05, + "loss": 0.9566, + "step": 1395 + }, + { + "epoch": 0.77, + "learning_rate": 4.222146438729119e-05, + "loss": 0.9722, + "step": 1400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216890959247873e-05, + "loss": 0.988, + "step": 1405 + }, + { + "epoch": 0.78, + "learning_rate": 4.211621080419463e-05, + "loss": 1.0103, + "step": 1410 + }, + { + "epoch": 0.78, + "learning_rate": 4.206336846441695e-05, + "loss": 0.9805, + "step": 1415 + }, + { + "epoch": 0.79, + "learning_rate": 4.201038301632772e-05, + "loss": 1.0177, + "step": 1420 + }, + { + "epoch": 0.79, + "learning_rate": 4.195725490430917e-05, + "loss": 0.9938, + "step": 1425 + }, + { + "epoch": 0.79, + "learning_rate": 4.190398457394007e-05, + "loss": 1.0276, + "step": 1430 + }, + { + "epoch": 0.79, + "learning_rate": 4.1850572471991924e-05, + "loss": 0.9909, + "step": 1435 + }, + { + "epoch": 0.8, + "learning_rate": 4.1797019046425264e-05, + "loss": 0.9794, + "step": 1440 + }, + { + "epoch": 0.8, + "learning_rate": 4.1743324746385914e-05, + "loss": 1.0073, + "step": 1445 + }, + { + "epoch": 0.8, + "learning_rate": 4.1689490022201154e-05, + "loss": 1.0106, + "step": 1450 + }, + { + "epoch": 0.8, + "learning_rate": 4.163551532537601e-05, + "loss": 1.0241, + "step": 1455 + }, + { + "epoch": 0.81, + "learning_rate": 4.1581401108589425e-05, + "loss": 1.0178, + "step": 1460 + }, + { + "epoch": 0.81, + "learning_rate": 4.1527147825690495e-05, + "loss": 0.9426, + "step": 1465 + }, + { + "epoch": 0.81, + "learning_rate": 4.1472755931694626e-05, + "loss": 0.9642, + "step": 1470 + }, + { + "epoch": 0.82, + "learning_rate": 4.141822588277976e-05, + "loss": 0.9678, + "step": 1475 + }, + { + "epoch": 0.82, + "learning_rate": 4.136355813628251e-05, + "loss": 0.9654, + "step": 1480 + }, + { + "epoch": 0.82, + "learning_rate": 4.130875315069435e-05, + "loss": 0.9748, + "step": 1485 + }, + { + "epoch": 0.82, + "learning_rate": 4.125381138565775e-05, + "loss": 0.9321, + "step": 1490 + }, + { + "epoch": 0.83, + "learning_rate": 4.1198733301962346e-05, + "loss": 0.9885, + "step": 1495 + }, + { + "epoch": 0.83, + "learning_rate": 4.114351936154105e-05, + "loss": 0.9385, + "step": 1500 + }, + { + "epoch": 0.83, + "learning_rate": 4.108817002746619e-05, + "loss": 0.9362, + "step": 1505 + }, + { + "epoch": 0.83, + "learning_rate": 4.1032685763945625e-05, + "loss": 0.9764, + "step": 1510 + }, + { + "epoch": 0.84, + "learning_rate": 4.097706703631886e-05, + "loss": 0.961, + "step": 1515 + }, + { + "epoch": 0.84, + "learning_rate": 4.092131431105312e-05, + "loss": 0.9818, + "step": 1520 + }, + { + "epoch": 0.84, + "learning_rate": 4.086542805573945e-05, + "loss": 0.9855, + "step": 1525 + }, + { + "epoch": 0.85, + "learning_rate": 4.080940873908881e-05, + "loss": 0.9618, + "step": 1530 + }, + { + "epoch": 0.85, + "learning_rate": 4.07532568309281e-05, + "loss": 1.0102, + "step": 1535 + }, + { + "epoch": 0.85, + "learning_rate": 4.069697280219628e-05, + "loss": 0.9773, + "step": 1540 + }, + { + "epoch": 0.85, + "learning_rate": 4.0640557124940376e-05, + "loss": 0.9687, + "step": 1545 + }, + { + "epoch": 0.86, + "learning_rate": 4.058401027231152e-05, + "loss": 0.9818, + "step": 1550 + }, + { + "epoch": 0.86, + "learning_rate": 4.052733271856103e-05, + "loss": 0.9911, + "step": 1555 + }, + { + "epoch": 0.86, + "learning_rate": 4.0470524939036355e-05, + "loss": 0.9694, + "step": 1560 + }, + { + "epoch": 0.87, + "learning_rate": 4.0413587410177155e-05, + "loss": 0.9374, + "step": 1565 + }, + { + "epoch": 0.87, + "learning_rate": 4.035652060951128e-05, + "loss": 0.956, + "step": 1570 + }, + { + "epoch": 0.87, + "learning_rate": 4.0299325015650774e-05, + "loss": 0.9491, + "step": 1575 + }, + { + "epoch": 0.87, + "learning_rate": 4.024200110828783e-05, + "loss": 0.9569, + "step": 1580 + }, + { + "epoch": 0.88, + "learning_rate": 4.018454936819082e-05, + "loss": 0.9627, + "step": 1585 + }, + { + "epoch": 0.88, + "learning_rate": 4.012697027720018e-05, + "loss": 0.9922, + "step": 1590 + }, + { + "epoch": 0.88, + "learning_rate": 4.0069264318224506e-05, + "loss": 0.9703, + "step": 1595 + }, + { + "epoch": 0.88, + "learning_rate": 4.0011431975236337e-05, + "loss": 0.9436, + "step": 1600 + }, + { + "epoch": 0.89, + "learning_rate": 3.995347373326822e-05, + "loss": 0.961, + "step": 1605 + }, + { + "epoch": 0.89, + "learning_rate": 3.989539007840861e-05, + "loss": 0.9247, + "step": 1610 + }, + { + "epoch": 0.89, + "learning_rate": 3.983718149779775e-05, + "loss": 0.9537, + "step": 1615 + }, + { + "epoch": 0.9, + "learning_rate": 3.9778848479623656e-05, + "loss": 0.9415, + "step": 1620 + }, + { + "epoch": 0.9, + "learning_rate": 3.972039151311795e-05, + "loss": 0.9954, + "step": 1625 + }, + { + "epoch": 0.9, + "learning_rate": 3.966181108855183e-05, + "loss": 0.9451, + "step": 1630 + }, + { + "epoch": 0.9, + "learning_rate": 3.960310769723189e-05, + "loss": 0.9977, + "step": 1635 + }, + { + "epoch": 0.91, + "learning_rate": 3.9544281831496034e-05, + "loss": 0.9806, + "step": 1640 + }, + { + "epoch": 0.91, + "learning_rate": 3.9485333984709374e-05, + "loss": 0.9851, + "step": 1645 + }, + { + "epoch": 0.91, + "learning_rate": 3.942626465126001e-05, + "loss": 1.0089, + "step": 1650 + }, + { + "epoch": 0.92, + "learning_rate": 3.9367074326555e-05, + "loss": 0.9562, + "step": 1655 + }, + { + "epoch": 0.92, + "learning_rate": 3.930776350701609e-05, + "loss": 0.9892, + "step": 1660 + }, + { + "epoch": 0.92, + "learning_rate": 3.92483326900756e-05, + "loss": 0.9875, + "step": 1665 + }, + { + "epoch": 0.92, + "learning_rate": 3.91887823741723e-05, + "loss": 1.0012, + "step": 1670 + }, + { + "epoch": 0.93, + "learning_rate": 3.9129113058747136e-05, + "loss": 0.97, + "step": 1675 + }, + { + "epoch": 0.93, + "learning_rate": 3.9069325244239095e-05, + "loss": 0.9793, + "step": 1680 + }, + { + "epoch": 0.93, + "learning_rate": 3.900941943208103e-05, + "loss": 0.9864, + "step": 1685 + }, + { + "epoch": 0.93, + "learning_rate": 3.894939612469539e-05, + "loss": 0.9596, + "step": 1690 + }, + { + "epoch": 0.94, + "learning_rate": 3.888925582549006e-05, + "loss": 0.9679, + "step": 1695 + }, + { + "epoch": 0.94, + "learning_rate": 3.882899903885412e-05, + "loss": 0.9395, + "step": 1700 + }, + { + "epoch": 0.94, + "learning_rate": 3.876862627015361e-05, + "loss": 0.9774, + "step": 1705 + }, + { + "epoch": 0.95, + "learning_rate": 3.87081380257273e-05, + "loss": 0.9686, + "step": 1710 + }, + { + "epoch": 0.95, + "learning_rate": 3.864753481288244e-05, + "loss": 0.953, + "step": 1715 + }, + { + "epoch": 0.95, + "learning_rate": 3.8586817139890515e-05, + "loss": 0.9719, + "step": 1720 + }, + { + "epoch": 0.95, + "learning_rate": 3.852598551598294e-05, + "loss": 0.9748, + "step": 1725 + }, + { + "epoch": 0.96, + "learning_rate": 3.8465040451346874e-05, + "loss": 0.9741, + "step": 1730 + }, + { + "epoch": 0.96, + "learning_rate": 3.8403982457120836e-05, + "loss": 0.9747, + "step": 1735 + }, + { + "epoch": 0.96, + "learning_rate": 3.834281204539051e-05, + "loss": 0.9791, + "step": 1740 + }, + { + "epoch": 0.96, + "learning_rate": 3.828152972918438e-05, + "loss": 0.9704, + "step": 1745 + }, + { + "epoch": 0.97, + "learning_rate": 3.82201360224695e-05, + "loss": 0.9556, + "step": 1750 + }, + { + "epoch": 0.97, + "learning_rate": 3.815863144014711e-05, + "loss": 0.9552, + "step": 1755 + }, + { + "epoch": 0.97, + "learning_rate": 3.809701649804834e-05, + "loss": 0.9594, + "step": 1760 + }, + { + "epoch": 0.98, + "learning_rate": 3.8035291712929926e-05, + "loss": 0.959, + "step": 1765 + }, + { + "epoch": 0.98, + "learning_rate": 3.797345760246982e-05, + "loss": 1.0254, + "step": 1770 + }, + { + "epoch": 0.98, + "learning_rate": 3.791151468526289e-05, + "loss": 0.9925, + "step": 1775 + }, + { + "epoch": 0.98, + "learning_rate": 3.784946348081654e-05, + "loss": 0.9516, + "step": 1780 + }, + { + "epoch": 0.99, + "learning_rate": 3.7787304509546365e-05, + "loss": 0.954, + "step": 1785 + }, + { + "epoch": 0.99, + "learning_rate": 3.7725038292771774e-05, + "loss": 0.9465, + "step": 1790 + }, + { + "epoch": 0.99, + "learning_rate": 3.766266535271167e-05, + "loss": 0.9792, + "step": 1795 + }, + { + "epoch": 1.0, + "learning_rate": 3.760018621248e-05, + "loss": 0.964, + "step": 1800 + }, + { + "epoch": 1.0, + "learning_rate": 3.75376013960814e-05, + "loss": 0.9419, + "step": 1805 + }, + { + "epoch": 1.0, + "learning_rate": 3.747491142840681e-05, + "loss": 0.9818, + "step": 1810 + }, + { + "epoch": 1.0, + "learning_rate": 3.741211683522904e-05, + "loss": 0.9153, + "step": 1815 + }, + { + "epoch": 1.01, + "learning_rate": 3.734921814319841e-05, + "loss": 0.955, + "step": 1820 + }, + { + "epoch": 1.01, + "learning_rate": 3.728621587983828e-05, + "loss": 0.9467, + "step": 1825 + }, + { + "epoch": 1.01, + "learning_rate": 3.722311057354067e-05, + "loss": 0.9816, + "step": 1830 + }, + { + "epoch": 1.01, + "learning_rate": 3.715990275356178e-05, + "loss": 0.9727, + "step": 1835 + }, + { + "epoch": 1.02, + "learning_rate": 3.7096592950017617e-05, + "loss": 0.9822, + "step": 1840 + }, + { + "epoch": 1.02, + "learning_rate": 3.703318169387947e-05, + "loss": 0.942, + "step": 1845 + }, + { + "epoch": 1.02, + "learning_rate": 3.696966951696952e-05, + "loss": 0.9306, + "step": 1850 + }, + { + "epoch": 1.03, + "learning_rate": 3.690605695195637e-05, + "loss": 0.9654, + "step": 1855 + }, + { + "epoch": 1.03, + "learning_rate": 3.684234453235054e-05, + "loss": 0.9592, + "step": 1860 + }, + { + "epoch": 1.03, + "learning_rate": 3.677853279250003e-05, + "loss": 0.988, + "step": 1865 + }, + { + "epoch": 1.03, + "learning_rate": 3.671462226758583e-05, + "loss": 0.9462, + "step": 1870 + }, + { + "epoch": 1.04, + "learning_rate": 3.665061349361742e-05, + "loss": 0.9685, + "step": 1875 + }, + { + "epoch": 1.04, + "learning_rate": 3.658650700742828e-05, + "loss": 0.9772, + "step": 1880 + }, + { + "epoch": 1.04, + "learning_rate": 3.6522303346671404e-05, + "loss": 0.9482, + "step": 1885 + }, + { + "epoch": 1.05, + "learning_rate": 3.645800304981477e-05, + "loss": 1.0069, + "step": 1890 + }, + { + "epoch": 1.05, + "learning_rate": 3.639360665613683e-05, + "loss": 0.9003, + "step": 1895 + }, + { + "epoch": 1.05, + "learning_rate": 3.632911470572197e-05, + "loss": 0.9279, + "step": 1900 + }, + { + "epoch": 1.05, + "learning_rate": 3.626452773945603e-05, + "loss": 0.9237, + "step": 1905 + }, + { + "epoch": 1.06, + "learning_rate": 3.619984629902172e-05, + "loss": 1.0086, + "step": 1910 + }, + { + "epoch": 1.06, + "learning_rate": 3.613507092689409e-05, + "loss": 0.9625, + "step": 1915 + }, + { + "epoch": 1.06, + "learning_rate": 3.607020216633599e-05, + "loss": 0.9297, + "step": 1920 + }, + { + "epoch": 1.06, + "learning_rate": 3.60052405613935e-05, + "loss": 0.9718, + "step": 1925 + }, + { + "epoch": 1.07, + "learning_rate": 3.594018665689139e-05, + "loss": 0.9512, + "step": 1930 + }, + { + "epoch": 1.07, + "learning_rate": 3.5875040998428513e-05, + "loss": 0.9923, + "step": 1935 + }, + { + "epoch": 1.07, + "learning_rate": 3.5809804132373253e-05, + "loss": 0.9518, + "step": 1940 + }, + { + "epoch": 1.08, + "learning_rate": 3.574447660585897e-05, + "loss": 0.9324, + "step": 1945 + }, + { + "epoch": 1.08, + "learning_rate": 3.5679058966779344e-05, + "loss": 0.9724, + "step": 1950 + }, + { + "epoch": 1.08, + "learning_rate": 3.561355176378384e-05, + "loss": 0.978, + "step": 1955 + }, + { + "epoch": 1.08, + "learning_rate": 3.554795554627307e-05, + "loss": 0.9893, + "step": 1960 + }, + { + "epoch": 1.09, + "learning_rate": 3.548227086439422e-05, + "loss": 0.967, + "step": 1965 + }, + { + "epoch": 1.09, + "learning_rate": 3.541649826903639e-05, + "loss": 0.9529, + "step": 1970 + }, + { + "epoch": 1.09, + "learning_rate": 3.535063831182602e-05, + "loss": 0.9506, + "step": 1975 + }, + { + "epoch": 1.09, + "learning_rate": 3.528469154512224e-05, + "loss": 0.9525, + "step": 1980 + }, + { + "epoch": 1.1, + "learning_rate": 3.521865852201223e-05, + "loss": 0.9258, + "step": 1985 + }, + { + "epoch": 1.1, + "learning_rate": 3.5152539796306596e-05, + "loss": 0.9417, + "step": 1990 + }, + { + "epoch": 1.1, + "learning_rate": 3.508633592253472e-05, + "loss": 0.9465, + "step": 1995 + }, + { + "epoch": 1.11, + "learning_rate": 3.502004745594011e-05, + "loss": 0.9494, + "step": 2000 + }, + { + "epoch": 1.11, + "learning_rate": 3.4953674952475755e-05, + "loss": 0.9709, + "step": 2005 + }, + { + "epoch": 1.11, + "learning_rate": 3.488721896879943e-05, + "loss": 0.9581, + "step": 2010 + }, + { + "epoch": 1.11, + "learning_rate": 3.4820680062269074e-05, + "loss": 0.974, + "step": 2015 + }, + { + "epoch": 1.12, + "learning_rate": 3.4754058790938046e-05, + "loss": 0.9768, + "step": 2020 + }, + { + "epoch": 1.12, + "learning_rate": 3.468735571355055e-05, + "loss": 0.982, + "step": 2025 + }, + { + "epoch": 1.12, + "learning_rate": 3.4620571389536825e-05, + "loss": 0.984, + "step": 2030 + }, + { + "epoch": 1.13, + "learning_rate": 3.455370637900856e-05, + "loss": 0.9604, + "step": 2035 + }, + { + "epoch": 1.13, + "learning_rate": 3.448676124275414e-05, + "loss": 0.897, + "step": 2040 + }, + { + "epoch": 1.13, + "learning_rate": 3.4419736542233925e-05, + "loss": 0.9968, + "step": 2045 + }, + { + "epoch": 1.13, + "learning_rate": 3.4352632839575616e-05, + "loss": 0.9479, + "step": 2050 + }, + { + "epoch": 1.14, + "learning_rate": 3.428545069756946e-05, + "loss": 0.9724, + "step": 2055 + }, + { + "epoch": 1.14, + "learning_rate": 3.42181906796636e-05, + "loss": 0.9493, + "step": 2060 + }, + { + "epoch": 1.14, + "learning_rate": 3.415085334995927e-05, + "loss": 0.9348, + "step": 2065 + }, + { + "epoch": 1.14, + "learning_rate": 3.408343927320613e-05, + "loss": 0.9702, + "step": 2070 + }, + { + "epoch": 1.15, + "learning_rate": 3.401594901479753e-05, + "loss": 0.9089, + "step": 2075 + }, + { + "epoch": 1.15, + "learning_rate": 3.394838314076572e-05, + "loss": 0.9606, + "step": 2080 + }, + { + "epoch": 1.15, + "learning_rate": 3.3880742217777115e-05, + "loss": 0.9743, + "step": 2085 + }, + { + "epoch": 1.16, + "learning_rate": 3.381302681312759e-05, + "loss": 0.9469, + "step": 2090 + }, + { + "epoch": 1.16, + "learning_rate": 3.374523749473767e-05, + "loss": 0.949, + "step": 2095 + }, + { + "epoch": 1.16, + "learning_rate": 3.367737483114779e-05, + "loss": 0.9567, + "step": 2100 + }, + { + "epoch": 1.16, + "learning_rate": 3.360943939151351e-05, + "loss": 0.9718, + "step": 2105 + }, + { + "epoch": 1.17, + "learning_rate": 3.354143174560078e-05, + "loss": 0.9626, + "step": 2110 + }, + { + "epoch": 1.17, + "learning_rate": 3.3473352463781105e-05, + "loss": 0.9346, + "step": 2115 + }, + { + "epoch": 1.17, + "learning_rate": 3.340520211702681e-05, + "loss": 0.9208, + "step": 2120 + }, + { + "epoch": 1.18, + "learning_rate": 3.333698127690623e-05, + "loss": 0.9856, + "step": 2125 + }, + { + "epoch": 1.18, + "learning_rate": 3.326869051557891e-05, + "loss": 0.9049, + "step": 2130 + }, + { + "epoch": 1.18, + "learning_rate": 3.320033040579082e-05, + "loss": 0.9222, + "step": 2135 + }, + { + "epoch": 1.18, + "learning_rate": 3.3131901520869565e-05, + "loss": 0.9648, + "step": 2140 + }, + { + "epoch": 1.19, + "learning_rate": 3.306340443471951e-05, + "loss": 0.9538, + "step": 2145 + }, + { + "epoch": 1.19, + "learning_rate": 3.299483972181708e-05, + "loss": 0.9314, + "step": 2150 + }, + { + "epoch": 1.19, + "learning_rate": 3.292620795720583e-05, + "loss": 0.9576, + "step": 2155 + }, + { + "epoch": 1.19, + "learning_rate": 3.285750971649167e-05, + "loss": 0.9499, + "step": 2160 + }, + { + "epoch": 1.2, + "learning_rate": 3.278874557583807e-05, + "loss": 0.9568, + "step": 2165 + }, + { + "epoch": 1.2, + "learning_rate": 3.271991611196117e-05, + "loss": 0.9642, + "step": 2170 + }, + { + "epoch": 1.2, + "learning_rate": 3.265102190212497e-05, + "loss": 0.9526, + "step": 2175 + }, + { + "epoch": 1.21, + "learning_rate": 3.258206352413648e-05, + "loss": 0.933, + "step": 2180 + }, + { + "epoch": 1.21, + "learning_rate": 3.2513041556340887e-05, + "loss": 0.9683, + "step": 2185 + }, + { + "epoch": 1.21, + "learning_rate": 3.244395657761671e-05, + "loss": 0.9155, + "step": 2190 + }, + { + "epoch": 1.21, + "learning_rate": 3.2374809167370924e-05, + "loss": 0.9262, + "step": 2195 + }, + { + "epoch": 1.22, + "learning_rate": 3.230559990553409e-05, + "loss": 0.9778, + "step": 2200 + }, + { + "epoch": 1.22, + "learning_rate": 3.2236329372555544e-05, + "loss": 0.9577, + "step": 2205 + }, + { + "epoch": 1.22, + "learning_rate": 3.2166998149398465e-05, + "loss": 0.9286, + "step": 2210 + }, + { + "epoch": 1.22, + "learning_rate": 3.209760681753505e-05, + "loss": 0.9634, + "step": 2215 + }, + { + "epoch": 1.23, + "learning_rate": 3.2028155958941615e-05, + "loss": 0.9467, + "step": 2220 + }, + { + "epoch": 1.23, + "learning_rate": 3.195864615609373e-05, + "loss": 0.9543, + "step": 2225 + }, + { + "epoch": 1.23, + "learning_rate": 3.1889077991961304e-05, + "loss": 0.9914, + "step": 2230 + }, + { + "epoch": 1.24, + "learning_rate": 3.181945205000373e-05, + "loss": 0.9309, + "step": 2235 + }, + { + "epoch": 1.24, + "learning_rate": 3.1749768914164955e-05, + "loss": 0.9299, + "step": 2240 + }, + { + "epoch": 1.24, + "learning_rate": 3.168002916886864e-05, + "loss": 0.9462, + "step": 2245 + }, + { + "epoch": 1.24, + "learning_rate": 3.1610233399013194e-05, + "loss": 0.948, + "step": 2250 + }, + { + "epoch": 1.25, + "learning_rate": 3.15403821899669e-05, + "loss": 0.9335, + "step": 2255 + }, + { + "epoch": 1.25, + "learning_rate": 3.147047612756302e-05, + "loss": 0.952, + "step": 2260 + }, + { + "epoch": 1.25, + "learning_rate": 3.140051579809484e-05, + "loss": 0.9532, + "step": 2265 + }, + { + "epoch": 1.26, + "learning_rate": 3.133050178831079e-05, + "loss": 0.9853, + "step": 2270 + }, + { + "epoch": 1.26, + "learning_rate": 3.12604346854095e-05, + "loss": 0.8925, + "step": 2275 + }, + { + "epoch": 1.26, + "learning_rate": 3.119031507703491e-05, + "loss": 0.942, + "step": 2280 + }, + { + "epoch": 1.26, + "learning_rate": 3.112014355127129e-05, + "loss": 0.9132, + "step": 2285 + }, + { + "epoch": 1.27, + "learning_rate": 3.104992069663835e-05, + "loss": 0.9335, + "step": 2290 + }, + { + "epoch": 1.27, + "learning_rate": 3.0979647102086273e-05, + "loss": 0.9403, + "step": 2295 + }, + { + "epoch": 1.27, + "learning_rate": 3.090932335699081e-05, + "loss": 0.9246, + "step": 2300 + }, + { + "epoch": 1.27, + "learning_rate": 3.083895005114831e-05, + "loss": 0.912, + "step": 2305 + }, + { + "epoch": 1.28, + "learning_rate": 3.076852777477079e-05, + "loss": 0.9334, + "step": 2310 + }, + { + "epoch": 1.28, + "learning_rate": 3.069805711848096e-05, + "loss": 0.933, + "step": 2315 + }, + { + "epoch": 1.28, + "learning_rate": 3.062753867330729e-05, + "loss": 0.9348, + "step": 2320 + }, + { + "epoch": 1.29, + "learning_rate": 3.055697303067905e-05, + "loss": 0.9997, + "step": 2325 + }, + { + "epoch": 1.29, + "learning_rate": 3.048636078242137e-05, + "loss": 0.9196, + "step": 2330 + }, + { + "epoch": 1.29, + "learning_rate": 3.0415702520750235e-05, + "loss": 0.9735, + "step": 2335 + }, + { + "epoch": 1.29, + "learning_rate": 3.0344998838267525e-05, + "loss": 0.9395, + "step": 2340 + }, + { + "epoch": 1.3, + "learning_rate": 3.0274250327956093e-05, + "loss": 0.9455, + "step": 2345 + }, + { + "epoch": 1.3, + "learning_rate": 3.020345758317474e-05, + "loss": 0.968, + "step": 2350 + }, + { + "epoch": 1.3, + "learning_rate": 3.0132621197653245e-05, + "loss": 0.9403, + "step": 2355 + }, + { + "epoch": 1.3, + "learning_rate": 3.0061741765487418e-05, + "loss": 0.9267, + "step": 2360 + }, + { + "epoch": 1.31, + "learning_rate": 2.9990819881134073e-05, + "loss": 0.9734, + "step": 2365 + }, + { + "epoch": 1.31, + "learning_rate": 2.9919856139406093e-05, + "loss": 0.949, + "step": 2370 + }, + { + "epoch": 1.31, + "learning_rate": 2.9848851135467386e-05, + "loss": 0.9464, + "step": 2375 + }, + { + "epoch": 1.32, + "learning_rate": 2.977780546482794e-05, + "loss": 0.9709, + "step": 2380 + }, + { + "epoch": 1.32, + "learning_rate": 2.9706719723338795e-05, + "loss": 0.9202, + "step": 2385 + }, + { + "epoch": 1.32, + "learning_rate": 2.9635594507187074e-05, + "loss": 0.9625, + "step": 2390 + }, + { + "epoch": 1.32, + "learning_rate": 2.956443041289096e-05, + "loss": 0.9307, + "step": 2395 + }, + { + "epoch": 1.33, + "learning_rate": 2.9493228037294702e-05, + "loss": 0.907, + "step": 2400 + }, + { + "epoch": 1.33, + "learning_rate": 2.9421987977563613e-05, + "loss": 0.9487, + "step": 2405 + }, + { + "epoch": 1.33, + "learning_rate": 2.935071083117907e-05, + "loss": 0.9259, + "step": 2410 + }, + { + "epoch": 1.34, + "learning_rate": 2.9279397195933457e-05, + "loss": 0.9357, + "step": 2415 + }, + { + "epoch": 1.34, + "learning_rate": 2.920804766992521e-05, + "loss": 0.9149, + "step": 2420 + }, + { + "epoch": 1.34, + "learning_rate": 2.9136662851553787e-05, + "loss": 0.9664, + "step": 2425 + }, + { + "epoch": 1.34, + "learning_rate": 2.906524333951461e-05, + "loss": 0.973, + "step": 2430 + }, + { + "epoch": 1.35, + "learning_rate": 2.899378973279409e-05, + "loss": 0.9797, + "step": 2435 + }, + { + "epoch": 1.35, + "learning_rate": 2.892230263066459e-05, + "loss": 0.9604, + "step": 2440 + }, + { + "epoch": 1.35, + "learning_rate": 2.885078263267938e-05, + "loss": 0.9147, + "step": 2445 + }, + { + "epoch": 1.35, + "learning_rate": 2.8779230338667634e-05, + "loss": 0.9827, + "step": 2450 + }, + { + "epoch": 1.36, + "learning_rate": 2.870764634872939e-05, + "loss": 0.9362, + "step": 2455 + }, + { + "epoch": 1.36, + "learning_rate": 2.86360312632305e-05, + "loss": 0.9373, + "step": 2460 + }, + { + "epoch": 1.36, + "learning_rate": 2.8564385682797622e-05, + "loss": 0.955, + "step": 2465 + }, + { + "epoch": 1.37, + "learning_rate": 2.8492710208313177e-05, + "loss": 0.9108, + "step": 2470 + }, + { + "epoch": 1.37, + "learning_rate": 2.8421005440910303e-05, + "loss": 0.9259, + "step": 2475 + }, + { + "epoch": 1.37, + "learning_rate": 2.8349271981967797e-05, + "loss": 0.9333, + "step": 2480 + }, + { + "epoch": 1.37, + "learning_rate": 2.8277510433105102e-05, + "loss": 0.9437, + "step": 2485 + }, + { + "epoch": 1.38, + "learning_rate": 2.820572139617725e-05, + "loss": 0.9189, + "step": 2490 + }, + { + "epoch": 1.38, + "learning_rate": 2.8133905473269802e-05, + "loss": 0.9522, + "step": 2495 + }, + { + "epoch": 1.38, + "learning_rate": 2.8062063266693818e-05, + "loss": 0.9352, + "step": 2500 + }, + { + "epoch": 1.39, + "learning_rate": 2.7990195378980784e-05, + "loss": 0.9232, + "step": 2505 + }, + { + "epoch": 1.39, + "learning_rate": 2.7918302412877583e-05, + "loss": 0.8807, + "step": 2510 + }, + { + "epoch": 1.39, + "learning_rate": 2.7846384971341427e-05, + "loss": 0.9231, + "step": 2515 + }, + { + "epoch": 1.39, + "learning_rate": 2.7774443657534788e-05, + "loss": 0.9253, + "step": 2520 + }, + { + "epoch": 1.4, + "learning_rate": 2.770247907482036e-05, + "loss": 0.9646, + "step": 2525 + }, + { + "epoch": 1.4, + "learning_rate": 2.763049182675599e-05, + "loss": 0.9575, + "step": 2530 + }, + { + "epoch": 1.4, + "learning_rate": 2.7558482517089617e-05, + "loss": 0.9234, + "step": 2535 + }, + { + "epoch": 1.4, + "learning_rate": 2.748645174975421e-05, + "loss": 0.9215, + "step": 2540 + }, + { + "epoch": 1.41, + "learning_rate": 2.74144001288627e-05, + "loss": 0.959, + "step": 2545 + }, + { + "epoch": 1.41, + "learning_rate": 2.7342328258702894e-05, + "loss": 0.9509, + "step": 2550 + }, + { + "epoch": 1.41, + "learning_rate": 2.727023674373246e-05, + "loss": 0.8989, + "step": 2555 + }, + { + "epoch": 1.42, + "learning_rate": 2.7198126188573807e-05, + "loss": 0.9653, + "step": 2560 + }, + { + "epoch": 1.42, + "learning_rate": 2.7125997198009028e-05, + "loss": 0.9046, + "step": 2565 + }, + { + "epoch": 1.42, + "learning_rate": 2.7053850376974848e-05, + "loss": 0.9318, + "step": 2570 + }, + { + "epoch": 1.42, + "learning_rate": 2.6981686330557516e-05, + "loss": 0.9292, + "step": 2575 + }, + { + "epoch": 1.43, + "learning_rate": 2.6909505663987756e-05, + "loss": 0.99, + "step": 2580 + }, + { + "epoch": 1.43, + "learning_rate": 2.6837308982635678e-05, + "loss": 0.9737, + "step": 2585 + }, + { + "epoch": 1.43, + "learning_rate": 2.6765096892005726e-05, + "loss": 0.9649, + "step": 2590 + }, + { + "epoch": 1.43, + "learning_rate": 2.6692869997731545e-05, + "loss": 0.9687, + "step": 2595 + }, + { + "epoch": 1.44, + "learning_rate": 2.6620628905570964e-05, + "loss": 0.9708, + "step": 2600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6548374221400884e-05, + "loss": 0.9498, + "step": 2605 + }, + { + "epoch": 1.44, + "learning_rate": 2.6476106551212188e-05, + "loss": 0.9512, + "step": 2610 + }, + { + "epoch": 1.45, + "learning_rate": 2.6403826501104682e-05, + "loss": 0.964, + "step": 2615 + }, + { + "epoch": 1.45, + "learning_rate": 2.6331534677281998e-05, + "loss": 0.9321, + "step": 2620 + }, + { + "epoch": 1.45, + "learning_rate": 2.6259231686046508e-05, + "loss": 0.9032, + "step": 2625 + }, + { + "epoch": 1.45, + "learning_rate": 2.6186918133794252e-05, + "loss": 0.9543, + "step": 2630 + }, + { + "epoch": 1.46, + "learning_rate": 2.6114594627009847e-05, + "loss": 0.9355, + "step": 2635 + }, + { + "epoch": 1.46, + "learning_rate": 2.604226177226137e-05, + "loss": 0.9111, + "step": 2640 + }, + { + "epoch": 1.46, + "learning_rate": 2.596992017619534e-05, + "loss": 0.9896, + "step": 2645 + }, + { + "epoch": 1.47, + "learning_rate": 2.589757044553155e-05, + "loss": 0.9715, + "step": 2650 + }, + { + "epoch": 1.47, + "learning_rate": 2.5825213187058045e-05, + "loss": 0.8911, + "step": 2655 + }, + { + "epoch": 1.47, + "learning_rate": 2.5752849007625986e-05, + "loss": 0.9446, + "step": 2660 + }, + { + "epoch": 1.47, + "learning_rate": 2.568047851414459e-05, + "loss": 0.9453, + "step": 2665 + }, + { + "epoch": 1.48, + "learning_rate": 2.5608102313576027e-05, + "loss": 0.9244, + "step": 2670 + }, + { + "epoch": 1.48, + "learning_rate": 2.553572101293033e-05, + "loss": 0.9478, + "step": 2675 + }, + { + "epoch": 1.48, + "learning_rate": 2.546333521926031e-05, + "loss": 0.9708, + "step": 2680 + }, + { + "epoch": 1.48, + "learning_rate": 2.5390945539656445e-05, + "loss": 0.9266, + "step": 2685 + }, + { + "epoch": 1.49, + "learning_rate": 2.5318552581241822e-05, + "loss": 0.9199, + "step": 2690 + }, + { + "epoch": 1.49, + "learning_rate": 2.524615695116702e-05, + "loss": 0.9666, + "step": 2695 + }, + { + "epoch": 1.49, + "learning_rate": 2.5173759256605027e-05, + "loss": 0.9247, + "step": 2700 + }, + { + "epoch": 1.5, + "learning_rate": 2.510136010474614e-05, + "loss": 0.9534, + "step": 2705 + }, + { + "epoch": 1.5, + "learning_rate": 2.5028960102792887e-05, + "loss": 0.9502, + "step": 2710 + }, + { + "epoch": 1.5, + "learning_rate": 2.4971039897207112e-05, + "loss": 0.9331, + "step": 2715 + }, + { + "epoch": 1.5, + "learning_rate": 2.4898639895253865e-05, + "loss": 0.9806, + "step": 2720 + }, + { + "epoch": 1.51, + "learning_rate": 2.4826240743394982e-05, + "loss": 0.9525, + "step": 2725 + }, + { + "epoch": 1.51, + "learning_rate": 2.4753843048832985e-05, + "loss": 0.937, + "step": 2730 + }, + { + "epoch": 1.51, + "learning_rate": 2.4681447418758187e-05, + "loss": 0.935, + "step": 2735 + }, + { + "epoch": 1.52, + "learning_rate": 2.460905446034356e-05, + "loss": 0.9662, + "step": 2740 + }, + { + "epoch": 1.52, + "learning_rate": 2.45366647807397e-05, + "loss": 0.9249, + "step": 2745 + }, + { + "epoch": 1.52, + "learning_rate": 2.446427898706967e-05, + "loss": 0.9673, + "step": 2750 + }, + { + "epoch": 1.52, + "learning_rate": 2.439189768642398e-05, + "loss": 0.9316, + "step": 2755 + }, + { + "epoch": 1.53, + "learning_rate": 2.431952148585541e-05, + "loss": 0.9297, + "step": 2760 + }, + { + "epoch": 1.53, + "learning_rate": 2.424715099237402e-05, + "loss": 0.9278, + "step": 2765 + }, + { + "epoch": 1.53, + "learning_rate": 2.4174786812941968e-05, + "loss": 0.8954, + "step": 2770 + }, + { + "epoch": 1.53, + "learning_rate": 2.4102429554468456e-05, + "loss": 0.9586, + "step": 2775 + }, + { + "epoch": 1.54, + "learning_rate": 2.4030079823804673e-05, + "loss": 0.9119, + "step": 2780 + }, + { + "epoch": 1.54, + "learning_rate": 2.395773822773863e-05, + "loss": 0.8949, + "step": 2785 + }, + { + "epoch": 1.54, + "learning_rate": 2.3885405372990166e-05, + "loss": 0.9506, + "step": 2790 + }, + { + "epoch": 1.55, + "learning_rate": 2.3813081866205754e-05, + "loss": 0.9087, + "step": 2795 + }, + { + "epoch": 1.55, + "learning_rate": 2.3740768313953494e-05, + "loss": 0.923, + "step": 2800 + }, + { + "epoch": 1.55, + "learning_rate": 2.3668465322718004e-05, + "loss": 1.0212, + "step": 2805 + }, + { + "epoch": 1.55, + "learning_rate": 2.359617349889532e-05, + "loss": 0.9182, + "step": 2810 + }, + { + "epoch": 1.56, + "learning_rate": 2.3523893448787818e-05, + "loss": 0.9316, + "step": 2815 + }, + { + "epoch": 1.56, + "learning_rate": 2.3451625778599122e-05, + "loss": 0.94, + "step": 2820 + }, + { + "epoch": 1.56, + "learning_rate": 2.3379371094429038e-05, + "loss": 0.9181, + "step": 2825 + }, + { + "epoch": 1.56, + "learning_rate": 2.3307130002268457e-05, + "loss": 0.937, + "step": 2830 + }, + { + "epoch": 1.57, + "learning_rate": 2.3234903107994287e-05, + "loss": 0.9026, + "step": 2835 + }, + { + "epoch": 1.57, + "learning_rate": 2.3162691017364317e-05, + "loss": 0.954, + "step": 2840 + }, + { + "epoch": 1.57, + "learning_rate": 2.3090494336012253e-05, + "loss": 0.9661, + "step": 2845 + }, + { + "epoch": 1.58, + "learning_rate": 2.3018313669442483e-05, + "loss": 0.9127, + "step": 2850 + }, + { + "epoch": 1.58, + "learning_rate": 2.2946149623025158e-05, + "loss": 0.9317, + "step": 2855 + }, + { + "epoch": 1.58, + "learning_rate": 2.2874002801990978e-05, + "loss": 0.9856, + "step": 2860 + }, + { + "epoch": 1.58, + "learning_rate": 2.28018738114262e-05, + "loss": 1.0021, + "step": 2865 + }, + { + "epoch": 1.59, + "learning_rate": 2.272976325626755e-05, + "loss": 0.9655, + "step": 2870 + }, + { + "epoch": 1.59, + "learning_rate": 2.265767174129711e-05, + "loss": 0.9619, + "step": 2875 + }, + { + "epoch": 1.59, + "learning_rate": 2.2585599871137313e-05, + "loss": 0.9383, + "step": 2880 + }, + { + "epoch": 1.6, + "learning_rate": 2.251354825024579e-05, + "loss": 0.9332, + "step": 2885 + }, + { + "epoch": 1.6, + "learning_rate": 2.244151748291039e-05, + "loss": 0.9544, + "step": 2890 + }, + { + "epoch": 1.6, + "learning_rate": 2.236950817324401e-05, + "loss": 0.9343, + "step": 2895 + }, + { + "epoch": 1.6, + "learning_rate": 2.2297520925179647e-05, + "loss": 0.9189, + "step": 2900 + }, + { + "epoch": 1.61, + "learning_rate": 2.222555634246521e-05, + "loss": 0.9341, + "step": 2905 + }, + { + "epoch": 1.61, + "learning_rate": 2.215361502865858e-05, + "loss": 0.9567, + "step": 2910 + }, + { + "epoch": 1.61, + "learning_rate": 2.2081697587122423e-05, + "loss": 0.9047, + "step": 2915 + }, + { + "epoch": 1.61, + "learning_rate": 2.200980462101922e-05, + "loss": 0.9126, + "step": 2920 + }, + { + "epoch": 1.62, + "learning_rate": 2.1937936733306195e-05, + "loss": 0.9523, + "step": 2925 + }, + { + "epoch": 1.62, + "learning_rate": 2.18660945267302e-05, + "loss": 0.8802, + "step": 2930 + }, + { + "epoch": 1.62, + "learning_rate": 2.179427860382276e-05, + "loss": 0.9197, + "step": 2935 + }, + { + "epoch": 1.63, + "learning_rate": 2.1722489566894903e-05, + "loss": 0.9255, + "step": 2940 + }, + { + "epoch": 1.63, + "learning_rate": 2.1650728018032206e-05, + "loss": 0.8921, + "step": 2945 + }, + { + "epoch": 1.63, + "learning_rate": 2.15789945590897e-05, + "loss": 0.9607, + "step": 2950 + }, + { + "epoch": 1.63, + "learning_rate": 2.150728979168683e-05, + "loss": 0.9755, + "step": 2955 + }, + { + "epoch": 1.64, + "learning_rate": 2.1435614317202384e-05, + "loss": 0.9943, + "step": 2960 + }, + { + "epoch": 1.64, + "learning_rate": 2.1363968736769508e-05, + "loss": 0.9463, + "step": 2965 + }, + { + "epoch": 1.64, + "learning_rate": 2.1292353651270617e-05, + "loss": 0.9107, + "step": 2970 + }, + { + "epoch": 1.65, + "learning_rate": 2.1220769661332365e-05, + "loss": 0.9311, + "step": 2975 + }, + { + "epoch": 1.65, + "learning_rate": 2.1149217367320622e-05, + "loss": 0.9459, + "step": 2980 + }, + { + "epoch": 1.65, + "learning_rate": 2.107769736933541e-05, + "loss": 0.9439, + "step": 2985 + }, + { + "epoch": 1.65, + "learning_rate": 2.100621026720591e-05, + "loss": 0.9719, + "step": 2990 + }, + { + "epoch": 1.66, + "learning_rate": 2.093475666048539e-05, + "loss": 0.9569, + "step": 2995 + }, + { + "epoch": 1.66, + "learning_rate": 2.0863337148446222e-05, + "loss": 0.9308, + "step": 3000 + }, + { + "epoch": 1.66, + "learning_rate": 2.07919523300748e-05, + "loss": 0.9269, + "step": 3005 + }, + { + "epoch": 1.66, + "learning_rate": 2.0720602804066552e-05, + "loss": 0.9358, + "step": 3010 + }, + { + "epoch": 1.67, + "learning_rate": 2.0649289168820943e-05, + "loss": 0.9291, + "step": 3015 + }, + { + "epoch": 1.67, + "learning_rate": 2.0578012022436386e-05, + "loss": 0.969, + "step": 3020 + }, + { + "epoch": 1.67, + "learning_rate": 2.0506771962705304e-05, + "loss": 0.9319, + "step": 3025 + }, + { + "epoch": 1.68, + "learning_rate": 2.0435569587109042e-05, + "loss": 0.9574, + "step": 3030 + }, + { + "epoch": 1.68, + "learning_rate": 2.036440549281293e-05, + "loss": 0.9654, + "step": 3035 + }, + { + "epoch": 1.68, + "learning_rate": 2.0293280276661204e-05, + "loss": 0.9742, + "step": 3040 + }, + { + "epoch": 1.68, + "learning_rate": 2.0222194535172067e-05, + "loss": 0.9014, + "step": 3045 + }, + { + "epoch": 1.69, + "learning_rate": 2.0151148864532623e-05, + "loss": 0.9246, + "step": 3050 + }, + { + "epoch": 1.69, + "learning_rate": 2.0080143860593913e-05, + "loss": 0.9522, + "step": 3055 + }, + { + "epoch": 1.69, + "learning_rate": 2.0009180118865933e-05, + "loss": 0.9567, + "step": 3060 + }, + { + "epoch": 1.69, + "learning_rate": 1.9938258234512588e-05, + "loss": 0.983, + "step": 3065 + }, + { + "epoch": 1.7, + "learning_rate": 1.9867378802346764e-05, + "loss": 0.8722, + "step": 3070 + }, + { + "epoch": 1.7, + "learning_rate": 1.979654241682527e-05, + "loss": 0.9122, + "step": 3075 + }, + { + "epoch": 1.7, + "learning_rate": 1.972574967204391e-05, + "loss": 0.9362, + "step": 3080 + }, + { + "epoch": 1.71, + "learning_rate": 1.9655001161732478e-05, + "loss": 0.8944, + "step": 3085 + }, + { + "epoch": 1.71, + "learning_rate": 1.9584297479249774e-05, + "loss": 0.9329, + "step": 3090 + }, + { + "epoch": 1.71, + "learning_rate": 1.9513639217578636e-05, + "loss": 0.9707, + "step": 3095 + }, + { + "epoch": 1.71, + "learning_rate": 1.9443026969320955e-05, + "loss": 0.9367, + "step": 3100 + }, + { + "epoch": 1.72, + "learning_rate": 1.937246132669272e-05, + "loss": 0.896, + "step": 3105 + }, + { + "epoch": 1.72, + "learning_rate": 1.9301942881519047e-05, + "loss": 0.97, + "step": 3110 + }, + { + "epoch": 1.72, + "learning_rate": 1.9231472225229216e-05, + "loss": 0.9638, + "step": 3115 + }, + { + "epoch": 1.73, + "learning_rate": 1.9161049948851684e-05, + "loss": 0.9561, + "step": 3120 + }, + { + "epoch": 1.73, + "learning_rate": 1.9090676643009193e-05, + "loss": 0.9734, + "step": 3125 + }, + { + "epoch": 1.73, + "learning_rate": 1.902035289791373e-05, + "loss": 0.9651, + "step": 3130 + }, + { + "epoch": 1.73, + "learning_rate": 1.8950079303361658e-05, + "loss": 0.9489, + "step": 3135 + }, + { + "epoch": 1.74, + "learning_rate": 1.8879856448728723e-05, + "loss": 0.9893, + "step": 3140 + }, + { + "epoch": 1.74, + "learning_rate": 1.8809684922965097e-05, + "loss": 0.9549, + "step": 3145 + }, + { + "epoch": 1.74, + "learning_rate": 1.8739565314590507e-05, + "loss": 0.9196, + "step": 3150 + }, + { + "epoch": 1.74, + "learning_rate": 1.8669498211689216e-05, + "loss": 0.9568, + "step": 3155 + }, + { + "epoch": 1.75, + "learning_rate": 1.859948420190517e-05, + "loss": 0.904, + "step": 3160 + }, + { + "epoch": 1.75, + "learning_rate": 1.852952387243698e-05, + "loss": 0.9375, + "step": 3165 + }, + { + "epoch": 1.75, + "learning_rate": 1.8459617810033096e-05, + "loss": 0.9048, + "step": 3170 + }, + { + "epoch": 1.76, + "learning_rate": 1.83897666009868e-05, + "loss": 0.9514, + "step": 3175 + }, + { + "epoch": 1.76, + "learning_rate": 1.8319970831131363e-05, + "loss": 0.8855, + "step": 3180 + }, + { + "epoch": 1.76, + "learning_rate": 1.825023108583505e-05, + "loss": 0.9119, + "step": 3185 + }, + { + "epoch": 1.76, + "learning_rate": 1.818054794999628e-05, + "loss": 0.9733, + "step": 3190 + }, + { + "epoch": 1.77, + "learning_rate": 1.8110922008038705e-05, + "loss": 0.9597, + "step": 3195 + }, + { + "epoch": 1.77, + "learning_rate": 1.8041353843906275e-05, + "loss": 0.8952, + "step": 3200 + }, + { + "epoch": 1.77, + "learning_rate": 1.797184404105839e-05, + "loss": 0.8934, + "step": 3205 + }, + { + "epoch": 1.77, + "learning_rate": 1.7902393182464955e-05, + "loss": 0.9299, + "step": 3210 + }, + { + "epoch": 1.78, + "learning_rate": 1.7833001850601544e-05, + "loss": 0.9247, + "step": 3215 + }, + { + "epoch": 1.78, + "learning_rate": 1.7763670627444465e-05, + "loss": 0.9672, + "step": 3220 + }, + { + "epoch": 1.78, + "learning_rate": 1.7694400094465913e-05, + "loss": 0.9451, + "step": 3225 + }, + { + "epoch": 1.79, + "learning_rate": 1.7625190832629085e-05, + "loss": 0.9294, + "step": 3230 + }, + { + "epoch": 1.79, + "learning_rate": 1.7556043422383293e-05, + "loss": 0.9145, + "step": 3235 + }, + { + "epoch": 1.79, + "learning_rate": 1.7486958443659112e-05, + "loss": 0.9508, + "step": 3240 + }, + { + "epoch": 1.79, + "learning_rate": 1.7417936475863526e-05, + "loss": 0.8725, + "step": 3245 + }, + { + "epoch": 1.8, + "learning_rate": 1.7348978097875036e-05, + "loss": 0.9195, + "step": 3250 + }, + { + "epoch": 1.8, + "learning_rate": 1.728008388803883e-05, + "loss": 0.933, + "step": 3255 + }, + { + "epoch": 1.8, + "learning_rate": 1.7211254424161933e-05, + "loss": 0.9747, + "step": 3260 + }, + { + "epoch": 1.81, + "learning_rate": 1.7142490283508324e-05, + "loss": 0.9168, + "step": 3265 + }, + { + "epoch": 1.81, + "learning_rate": 1.707379204279418e-05, + "loss": 0.9844, + "step": 3270 + }, + { + "epoch": 1.81, + "learning_rate": 1.700516027818293e-05, + "loss": 0.9071, + "step": 3275 + }, + { + "epoch": 1.81, + "learning_rate": 1.6936595565280488e-05, + "loss": 0.9311, + "step": 3280 + }, + { + "epoch": 1.82, + "learning_rate": 1.686809847913045e-05, + "loss": 0.9419, + "step": 3285 + }, + { + "epoch": 1.82, + "learning_rate": 1.679966959420918e-05, + "loss": 0.9679, + "step": 3290 + }, + { + "epoch": 1.82, + "learning_rate": 1.67313094844211e-05, + "loss": 0.9601, + "step": 3295 + }, + { + "epoch": 1.82, + "learning_rate": 1.6663018723093774e-05, + "loss": 0.9022, + "step": 3300 + }, + { + "epoch": 1.83, + "learning_rate": 1.6594797882973196e-05, + "loss": 0.8925, + "step": 3305 + }, + { + "epoch": 1.83, + "learning_rate": 1.6526647536218894e-05, + "loss": 0.9238, + "step": 3310 + }, + { + "epoch": 1.83, + "learning_rate": 1.6458568254399225e-05, + "loss": 0.9632, + "step": 3315 + }, + { + "epoch": 1.84, + "learning_rate": 1.6390560608486496e-05, + "loss": 0.9164, + "step": 3320 + }, + { + "epoch": 1.84, + "learning_rate": 1.6322625168852217e-05, + "loss": 0.9505, + "step": 3325 + }, + { + "epoch": 1.84, + "learning_rate": 1.6254762505262338e-05, + "loss": 0.9622, + "step": 3330 + }, + { + "epoch": 1.84, + "learning_rate": 1.618697318687241e-05, + "loss": 0.9204, + "step": 3335 + }, + { + "epoch": 1.85, + "learning_rate": 1.6119257782222895e-05, + "loss": 0.9504, + "step": 3340 + }, + { + "epoch": 1.85, + "learning_rate": 1.6051616859234285e-05, + "loss": 0.9384, + "step": 3345 + }, + { + "epoch": 1.85, + "learning_rate": 1.5984050985202474e-05, + "loss": 0.9374, + "step": 3350 + }, + { + "epoch": 1.86, + "learning_rate": 1.591656072679387e-05, + "loss": 0.9937, + "step": 3355 + }, + { + "epoch": 1.86, + "learning_rate": 1.5849146650040737e-05, + "loss": 0.9587, + "step": 3360 + }, + { + "epoch": 1.86, + "learning_rate": 1.5781809320336412e-05, + "loss": 0.9312, + "step": 3365 + }, + { + "epoch": 1.86, + "learning_rate": 1.5714549302430536e-05, + "loss": 0.9343, + "step": 3370 + }, + { + "epoch": 1.87, + "learning_rate": 1.5647367160424393e-05, + "loss": 0.9419, + "step": 3375 + }, + { + "epoch": 1.87, + "learning_rate": 1.558026345776608e-05, + "loss": 0.9371, + "step": 3380 + }, + { + "epoch": 1.87, + "learning_rate": 1.551323875724587e-05, + "loss": 0.913, + "step": 3385 + }, + { + "epoch": 1.87, + "learning_rate": 1.5446293620991437e-05, + "loss": 0.9195, + "step": 3390 + }, + { + "epoch": 1.88, + "learning_rate": 1.5379428610463174e-05, + "loss": 0.9164, + "step": 3395 + }, + { + "epoch": 1.88, + "learning_rate": 1.531264428644945e-05, + "loss": 0.9256, + "step": 3400 + }, + { + "epoch": 1.88, + "learning_rate": 1.5245941209061953e-05, + "loss": 0.9658, + "step": 3405 + }, + { + "epoch": 1.89, + "learning_rate": 1.517931993773094e-05, + "loss": 0.9397, + "step": 3410 + }, + { + "epoch": 1.89, + "learning_rate": 1.5112781031200569e-05, + "loss": 0.915, + "step": 3415 + }, + { + "epoch": 1.89, + "learning_rate": 1.5046325047524251e-05, + "loss": 0.9116, + "step": 3420 + }, + { + "epoch": 1.89, + "learning_rate": 1.4979952544059888e-05, + "loss": 0.9192, + "step": 3425 + }, + { + "epoch": 1.9, + "learning_rate": 1.4913664077465289e-05, + "loss": 0.924, + "step": 3430 + }, + { + "epoch": 1.9, + "learning_rate": 1.4847460203693408e-05, + "loss": 0.9579, + "step": 3435 + }, + { + "epoch": 1.9, + "learning_rate": 1.4781341477987776e-05, + "loss": 0.9816, + "step": 3440 + }, + { + "epoch": 1.9, + "learning_rate": 1.4715308454877758e-05, + "loss": 0.9412, + "step": 3445 + }, + { + "epoch": 1.91, + "learning_rate": 1.4649361688173979e-05, + "loss": 0.9413, + "step": 3450 + }, + { + "epoch": 1.91, + "learning_rate": 1.458350173096361e-05, + "loss": 0.9425, + "step": 3455 + }, + { + "epoch": 1.91, + "learning_rate": 1.4517729135605795e-05, + "loss": 0.9132, + "step": 3460 + }, + { + "epoch": 1.92, + "learning_rate": 1.4452044453726942e-05, + "loss": 0.9455, + "step": 3465 + }, + { + "epoch": 1.92, + "learning_rate": 1.4386448236216174e-05, + "loss": 0.9543, + "step": 3470 + }, + { + "epoch": 1.92, + "learning_rate": 1.4320941033220667e-05, + "loss": 0.9118, + "step": 3475 + }, + { + "epoch": 1.92, + "learning_rate": 1.4255523394141041e-05, + "loss": 0.9425, + "step": 3480 + }, + { + "epoch": 1.93, + "learning_rate": 1.4190195867626749e-05, + "loss": 0.8958, + "step": 3485 + }, + { + "epoch": 1.93, + "learning_rate": 1.4124959001571497e-05, + "loss": 0.944, + "step": 3490 + }, + { + "epoch": 1.93, + "learning_rate": 1.4059813343108616e-05, + "loss": 0.9611, + "step": 3495 + }, + { + "epoch": 1.94, + "learning_rate": 1.3994759438606501e-05, + "loss": 0.9448, + "step": 3500 + }, + { + "epoch": 1.94, + "learning_rate": 1.3929797833664013e-05, + "loss": 0.9659, + "step": 3505 + }, + { + "epoch": 1.94, + "learning_rate": 1.3864929073105922e-05, + "loss": 0.9178, + "step": 3510 + }, + { + "epoch": 1.94, + "learning_rate": 1.3800153700978282e-05, + "loss": 0.8965, + "step": 3515 + }, + { + "epoch": 1.95, + "learning_rate": 1.373547226054398e-05, + "loss": 0.9198, + "step": 3520 + }, + { + "epoch": 1.95, + "learning_rate": 1.367088529427803e-05, + "loss": 0.9398, + "step": 3525 + }, + { + "epoch": 1.95, + "learning_rate": 1.3606393343863182e-05, + "loss": 0.9423, + "step": 3530 + }, + { + "epoch": 1.95, + "learning_rate": 1.3541996950185227e-05, + "loss": 0.9592, + "step": 3535 + }, + { + "epoch": 1.96, + "learning_rate": 1.3477696653328598e-05, + "loss": 0.9489, + "step": 3540 + }, + { + "epoch": 1.96, + "learning_rate": 1.3413492992571713e-05, + "loss": 0.963, + "step": 3545 + }, + { + "epoch": 1.96, + "learning_rate": 1.3349386506382586e-05, + "loss": 0.9449, + "step": 3550 + }, + { + "epoch": 1.97, + "learning_rate": 1.3285377732414172e-05, + "loss": 0.9043, + "step": 3555 + }, + { + "epoch": 1.97, + "learning_rate": 1.3221467207499972e-05, + "loss": 0.9362, + "step": 3560 + }, + { + "epoch": 1.97, + "learning_rate": 1.3157655467649463e-05, + "loss": 0.945, + "step": 3565 + }, + { + "epoch": 1.97, + "learning_rate": 1.3093943048043634e-05, + "loss": 0.8693, + "step": 3570 + }, + { + "epoch": 1.98, + "learning_rate": 1.3030330483030479e-05, + "loss": 0.9659, + "step": 3575 + }, + { + "epoch": 1.98, + "learning_rate": 1.2966818306120535e-05, + "loss": 0.9036, + "step": 3580 + }, + { + "epoch": 1.98, + "learning_rate": 1.2903407049982386e-05, + "loss": 0.9282, + "step": 3585 + }, + { + "epoch": 1.99, + "learning_rate": 1.2840097246438215e-05, + "loss": 0.8747, + "step": 3590 + }, + { + "epoch": 1.99, + "learning_rate": 1.277688942645934e-05, + "loss": 0.936, + "step": 3595 + }, + { + "epoch": 1.99, + "learning_rate": 1.2713784120161725e-05, + "loss": 0.8945, + "step": 3600 + }, + { + "epoch": 1.99, + "learning_rate": 1.2650781856801598e-05, + "loss": 0.9273, + "step": 3605 + }, + { + "epoch": 2.0, + "learning_rate": 1.258788316477097e-05, + "loss": 0.9632, + "step": 3610 + }, + { + "epoch": 2.0, + "learning_rate": 1.2525088571593202e-05, + "loss": 0.8641, + "step": 3615 + }, + { + "epoch": 2.0, + "learning_rate": 1.2462398603918607e-05, + "loss": 0.9314, + "step": 3620 + }, + { + "epoch": 2.0, + "learning_rate": 1.2399813787520006e-05, + "loss": 0.9169, + "step": 3625 + }, + { + "epoch": 2.01, + "learning_rate": 1.2337334647288334e-05, + "loss": 0.9089, + "step": 3630 + }, + { + "epoch": 2.01, + "learning_rate": 1.2274961707228228e-05, + "loss": 0.9039, + "step": 3635 + }, + { + "epoch": 2.01, + "learning_rate": 1.2212695490453646e-05, + "loss": 0.9378, + "step": 3640 + }, + { + "epoch": 2.02, + "learning_rate": 1.2150536519183475e-05, + "loss": 0.9297, + "step": 3645 + }, + { + "epoch": 2.02, + "learning_rate": 1.2088485314737108e-05, + "loss": 0.9488, + "step": 3650 + }, + { + "epoch": 2.02, + "learning_rate": 1.2026542397530186e-05, + "loss": 0.9625, + "step": 3655 + }, + { + "epoch": 2.02, + "learning_rate": 1.1964708287070073e-05, + "loss": 0.8874, + "step": 3660 + }, + { + "epoch": 2.03, + "learning_rate": 1.1902983501951666e-05, + "loss": 0.9224, + "step": 3665 + }, + { + "epoch": 2.03, + "learning_rate": 1.1841368559852892e-05, + "loss": 0.9442, + "step": 3670 + }, + { + "epoch": 2.03, + "learning_rate": 1.17798639775305e-05, + "loss": 0.8688, + "step": 3675 + }, + { + "epoch": 2.03, + "learning_rate": 1.1718470270815608e-05, + "loss": 0.8912, + "step": 3680 + }, + { + "epoch": 2.04, + "learning_rate": 1.1657187954609496e-05, + "loss": 0.9131, + "step": 3685 + }, + { + "epoch": 2.04, + "learning_rate": 1.1596017542879168e-05, + "loss": 0.9827, + "step": 3690 + }, + { + "epoch": 2.04, + "learning_rate": 1.1534959548653132e-05, + "loss": 0.8588, + "step": 3695 + }, + { + "epoch": 2.05, + "learning_rate": 1.147401448401706e-05, + "loss": 0.9359, + "step": 3700 + }, + { + "epoch": 2.05, + "learning_rate": 1.1413182860109491e-05, + "loss": 0.9375, + "step": 3705 + }, + { + "epoch": 2.05, + "learning_rate": 1.1352465187117562e-05, + "loss": 0.9242, + "step": 3710 + }, + { + "epoch": 2.05, + "learning_rate": 1.1291861974272703e-05, + "loss": 0.9132, + "step": 3715 + }, + { + "epoch": 2.06, + "learning_rate": 1.1231373729846393e-05, + "loss": 0.9142, + "step": 3720 + }, + { + "epoch": 2.06, + "learning_rate": 1.1171000961145883e-05, + "loss": 0.9132, + "step": 3725 + }, + { + "epoch": 2.06, + "learning_rate": 1.1110744174509952e-05, + "loss": 0.9603, + "step": 3730 + }, + { + "epoch": 2.07, + "learning_rate": 1.1050603875304622e-05, + "loss": 0.9442, + "step": 3735 + }, + { + "epoch": 2.07, + "learning_rate": 1.0990580567918979e-05, + "loss": 0.9392, + "step": 3740 + }, + { + "epoch": 2.07, + "learning_rate": 1.0930674755760908e-05, + "loss": 0.9203, + "step": 3745 + }, + { + "epoch": 2.07, + "learning_rate": 1.0870886941252872e-05, + "loss": 0.9468, + "step": 3750 + }, + { + "epoch": 2.08, + "learning_rate": 1.0811217625827705e-05, + "loss": 0.9605, + "step": 3755 + }, + { + "epoch": 2.08, + "learning_rate": 1.0751667309924399e-05, + "loss": 0.9166, + "step": 3760 + }, + { + "epoch": 2.08, + "learning_rate": 1.0692236492983918e-05, + "loss": 0.939, + "step": 3765 + }, + { + "epoch": 2.08, + "learning_rate": 1.0632925673445001e-05, + "loss": 0.9409, + "step": 3770 + }, + { + "epoch": 2.09, + "learning_rate": 1.0573735348739987e-05, + "loss": 0.9523, + "step": 3775 + }, + { + "epoch": 2.09, + "learning_rate": 1.0514666015290645e-05, + "loss": 0.8867, + "step": 3780 + }, + { + "epoch": 2.09, + "learning_rate": 1.0455718168503967e-05, + "loss": 0.9816, + "step": 3785 + }, + { + "epoch": 2.1, + "learning_rate": 1.0396892302768127e-05, + "loss": 0.9465, + "step": 3790 + }, + { + "epoch": 2.1, + "learning_rate": 1.033818891144817e-05, + "loss": 0.9352, + "step": 3795 + }, + { + "epoch": 2.1, + "learning_rate": 1.0279608486882054e-05, + "loss": 0.8877, + "step": 3800 + }, + { + "epoch": 2.1, + "learning_rate": 1.0221151520376343e-05, + "loss": 0.9098, + "step": 3805 + }, + { + "epoch": 2.11, + "learning_rate": 1.0162818502202251e-05, + "loss": 0.8687, + "step": 3810 + }, + { + "epoch": 2.11, + "learning_rate": 1.0104609921591387e-05, + "loss": 0.9256, + "step": 3815 + }, + { + "epoch": 2.11, + "learning_rate": 1.0046526266731782e-05, + "loss": 0.92, + "step": 3820 + }, + { + "epoch": 2.12, + "learning_rate": 9.988568024763673e-06, + "loss": 0.8813, + "step": 3825 + }, + { + "epoch": 2.12, + "learning_rate": 9.930735681775505e-06, + "loss": 0.891, + "step": 3830 + }, + { + "epoch": 2.12, + "learning_rate": 9.87302972279982e-06, + "loss": 0.9479, + "step": 3835 + }, + { + "epoch": 2.12, + "learning_rate": 9.815450631809191e-06, + "loss": 0.8963, + "step": 3840 + }, + { + "epoch": 2.13, + "learning_rate": 9.757998891712172e-06, + "loss": 0.9178, + "step": 3845 + }, + { + "epoch": 2.13, + "learning_rate": 9.700674984349228e-06, + "loss": 0.9072, + "step": 3850 + }, + { + "epoch": 2.13, + "learning_rate": 9.643479390488717e-06, + "loss": 0.8879, + "step": 3855 + }, + { + "epoch": 2.13, + "learning_rate": 9.586412589822846e-06, + "loss": 0.9484, + "step": 3860 + }, + { + "epoch": 2.14, + "learning_rate": 9.529475060963649e-06, + "loss": 0.9173, + "step": 3865 + }, + { + "epoch": 2.14, + "learning_rate": 9.472667281438982e-06, + "loss": 0.9318, + "step": 3870 + }, + { + "epoch": 2.14, + "learning_rate": 9.415989727688484e-06, + "loss": 0.9242, + "step": 3875 + }, + { + "epoch": 2.15, + "learning_rate": 9.359442875059631e-06, + "loss": 0.9314, + "step": 3880 + }, + { + "epoch": 2.15, + "learning_rate": 9.303027197803726e-06, + "loss": 0.9227, + "step": 3885 + }, + { + "epoch": 2.15, + "learning_rate": 9.246743169071906e-06, + "loss": 0.9253, + "step": 3890 + }, + { + "epoch": 2.15, + "learning_rate": 9.190591260911201e-06, + "loss": 0.9457, + "step": 3895 + }, + { + "epoch": 2.16, + "learning_rate": 9.134571944260554e-06, + "loss": 0.9214, + "step": 3900 + }, + { + "epoch": 2.16, + "learning_rate": 9.078685688946884e-06, + "loss": 0.9534, + "step": 3905 + }, + { + "epoch": 2.16, + "learning_rate": 9.022932963681141e-06, + "loss": 0.9236, + "step": 3910 + }, + { + "epoch": 2.16, + "learning_rate": 8.967314236054384e-06, + "loss": 0.8905, + "step": 3915 + }, + { + "epoch": 2.17, + "learning_rate": 8.911829972533817e-06, + "loss": 0.9319, + "step": 3920 + }, + { + "epoch": 2.17, + "learning_rate": 8.856480638458966e-06, + "loss": 0.8989, + "step": 3925 + }, + { + "epoch": 2.17, + "learning_rate": 8.80126669803766e-06, + "loss": 0.9234, + "step": 3930 + }, + { + "epoch": 2.18, + "learning_rate": 8.746188614342263e-06, + "loss": 0.9619, + "step": 3935 + }, + { + "epoch": 2.18, + "learning_rate": 8.691246849305653e-06, + "loss": 0.9537, + "step": 3940 + }, + { + "epoch": 2.18, + "learning_rate": 8.636441863717499e-06, + "loss": 0.9809, + "step": 3945 + }, + { + "epoch": 2.18, + "learning_rate": 8.581774117220238e-06, + "loss": 0.9219, + "step": 3950 + }, + { + "epoch": 2.19, + "learning_rate": 8.52724406830538e-06, + "loss": 0.9502, + "step": 3955 + }, + { + "epoch": 2.19, + "learning_rate": 8.472852174309514e-06, + "loss": 0.9277, + "step": 3960 + }, + { + "epoch": 2.19, + "learning_rate": 8.418598891410581e-06, + "loss": 0.8978, + "step": 3965 + }, + { + "epoch": 2.2, + "learning_rate": 8.364484674624e-06, + "loss": 0.9026, + "step": 3970 + }, + { + "epoch": 2.2, + "learning_rate": 8.31050997779885e-06, + "loss": 0.9658, + "step": 3975 + }, + { + "epoch": 2.2, + "learning_rate": 8.25667525361409e-06, + "loss": 0.9288, + "step": 3980 + }, + { + "epoch": 2.2, + "learning_rate": 8.202980953574735e-06, + "loss": 0.9009, + "step": 3985 + }, + { + "epoch": 2.21, + "learning_rate": 8.14942752800808e-06, + "loss": 0.934, + "step": 3990 + }, + { + "epoch": 2.21, + "learning_rate": 8.09601542605993e-06, + "loss": 0.9261, + "step": 3995 + }, + { + "epoch": 2.21, + "learning_rate": 8.042745095690826e-06, + "loss": 0.9086, + "step": 4000 + }, + { + "epoch": 2.21, + "learning_rate": 7.989616983672288e-06, + "loss": 0.9397, + "step": 4005 + }, + { + "epoch": 2.22, + "learning_rate": 7.936631535583055e-06, + "loss": 0.8661, + "step": 4010 + }, + { + "epoch": 2.22, + "learning_rate": 7.88378919580538e-06, + "loss": 0.9433, + "step": 4015 + }, + { + "epoch": 2.22, + "learning_rate": 7.83109040752128e-06, + "loss": 0.9473, + "step": 4020 + }, + { + "epoch": 2.23, + "learning_rate": 7.778535612708824e-06, + "loss": 0.9177, + "step": 4025 + }, + { + "epoch": 2.23, + "learning_rate": 7.726125252138417e-06, + "loss": 0.8932, + "step": 4030 + }, + { + "epoch": 2.23, + "learning_rate": 7.673859765369126e-06, + "loss": 0.9187, + "step": 4035 + }, + { + "epoch": 2.23, + "learning_rate": 7.62173959074497e-06, + "loss": 0.9009, + "step": 4040 + }, + { + "epoch": 2.24, + "learning_rate": 7.569765165391257e-06, + "loss": 0.9348, + "step": 4045 + }, + { + "epoch": 2.24, + "learning_rate": 7.517936925210917e-06, + "loss": 0.8827, + "step": 4050 + }, + { + "epoch": 2.24, + "learning_rate": 7.466255304880834e-06, + "loss": 0.9668, + "step": 4055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4147207378482295e-06, + "loss": 0.934, + "step": 4060 + }, + { + "epoch": 2.25, + "learning_rate": 7.36333365632697e-06, + "loss": 0.9251, + "step": 4065 + }, + { + "epoch": 2.25, + "learning_rate": 7.312094491294033e-06, + "loss": 0.9225, + "step": 4070 + }, + { + "epoch": 2.25, + "learning_rate": 7.261003672485783e-06, + "loss": 0.9198, + "step": 4075 + }, + { + "epoch": 2.26, + "learning_rate": 7.210061628394477e-06, + "loss": 0.9023, + "step": 4080 + }, + { + "epoch": 2.26, + "learning_rate": 7.159268786264564e-06, + "loss": 0.9378, + "step": 4085 + }, + { + "epoch": 2.26, + "learning_rate": 7.108625572089209e-06, + "loss": 0.9148, + "step": 4090 + }, + { + "epoch": 2.26, + "learning_rate": 7.05813241060661e-06, + "loss": 0.9308, + "step": 4095 + }, + { + "epoch": 2.27, + "learning_rate": 7.007789725296557e-06, + "loss": 0.8979, + "step": 4100 + }, + { + "epoch": 2.27, + "learning_rate": 6.957597938376748e-06, + "loss": 0.9329, + "step": 4105 + }, + { + "epoch": 2.27, + "learning_rate": 6.907557470799358e-06, + "loss": 0.8849, + "step": 4110 + }, + { + "epoch": 2.28, + "learning_rate": 6.857668742247458e-06, + "loss": 0.8879, + "step": 4115 + }, + { + "epoch": 2.28, + "learning_rate": 6.807932171131498e-06, + "loss": 0.9132, + "step": 4120 + }, + { + "epoch": 2.28, + "learning_rate": 6.758348174585804e-06, + "loss": 0.9273, + "step": 4125 + }, + { + "epoch": 2.28, + "learning_rate": 6.7089171684650785e-06, + "loss": 0.9115, + "step": 4130 + }, + { + "epoch": 2.29, + "learning_rate": 6.659639567340914e-06, + "loss": 0.9431, + "step": 4135 + }, + { + "epoch": 2.29, + "learning_rate": 6.610515784498314e-06, + "loss": 0.9515, + "step": 4140 + }, + { + "epoch": 2.29, + "learning_rate": 6.561546231932228e-06, + "loss": 0.9069, + "step": 4145 + }, + { + "epoch": 2.29, + "learning_rate": 6.512731320344101e-06, + "loss": 0.9178, + "step": 4150 + }, + { + "epoch": 2.3, + "learning_rate": 6.464071459138405e-06, + "loss": 0.9156, + "step": 4155 + }, + { + "epoch": 2.3, + "learning_rate": 6.415567056419244e-06, + "loss": 0.9637, + "step": 4160 + }, + { + "epoch": 2.3, + "learning_rate": 6.3672185189869e-06, + "loss": 0.9446, + "step": 4165 + }, + { + "epoch": 2.31, + "learning_rate": 6.319026252334445e-06, + "loss": 0.8726, + "step": 4170 + }, + { + "epoch": 2.31, + "learning_rate": 6.270990660644313e-06, + "loss": 0.9324, + "step": 4175 + }, + { + "epoch": 2.31, + "learning_rate": 6.223112146784935e-06, + "loss": 0.9252, + "step": 4180 + }, + { + "epoch": 2.31, + "learning_rate": 6.1753911123073435e-06, + "loss": 0.8924, + "step": 4185 + }, + { + "epoch": 2.32, + "learning_rate": 6.127827957441817e-06, + "loss": 0.9344, + "step": 4190 + }, + { + "epoch": 2.32, + "learning_rate": 6.080423081094522e-06, + "loss": 0.9341, + "step": 4195 + }, + { + "epoch": 2.32, + "learning_rate": 6.033176880844133e-06, + "loss": 0.905, + "step": 4200 + }, + { + "epoch": 2.33, + "learning_rate": 5.986089752938584e-06, + "loss": 0.9231, + "step": 4205 + }, + { + "epoch": 2.33, + "learning_rate": 5.939162092291622e-06, + "loss": 0.906, + "step": 4210 + }, + { + "epoch": 2.33, + "learning_rate": 5.892394292479633e-06, + "loss": 0.8957, + "step": 4215 + }, + { + "epoch": 2.33, + "learning_rate": 5.8457867457382024e-06, + "loss": 0.867, + "step": 4220 + }, + { + "epoch": 2.34, + "learning_rate": 5.7993398429589506e-06, + "loss": 0.9022, + "step": 4225 + }, + { + "epoch": 2.34, + "learning_rate": 5.753053973686148e-06, + "loss": 0.9064, + "step": 4230 + }, + { + "epoch": 2.34, + "learning_rate": 5.7069295261135525e-06, + "loss": 0.9019, + "step": 4235 + }, + { + "epoch": 2.34, + "learning_rate": 5.66096688708104e-06, + "loss": 0.9397, + "step": 4240 + }, + { + "epoch": 2.35, + "learning_rate": 5.615166442071457e-06, + "loss": 0.8971, + "step": 4245 + }, + { + "epoch": 2.35, + "learning_rate": 5.569528575207339e-06, + "loss": 0.8902, + "step": 4250 + }, + { + "epoch": 2.35, + "learning_rate": 5.5240536692477e-06, + "loss": 0.8982, + "step": 4255 + }, + { + "epoch": 2.36, + "learning_rate": 5.4787421055848164e-06, + "loss": 0.9135, + "step": 4260 + }, + { + "epoch": 2.36, + "learning_rate": 5.433594264241043e-06, + "loss": 0.8969, + "step": 4265 + }, + { + "epoch": 2.36, + "learning_rate": 5.3886105238656055e-06, + "loss": 0.9036, + "step": 4270 + }, + { + "epoch": 2.36, + "learning_rate": 5.3437912617314425e-06, + "loss": 0.9313, + "step": 4275 + }, + { + "epoch": 2.37, + "learning_rate": 5.299136853732034e-06, + "loss": 0.9034, + "step": 4280 + }, + { + "epoch": 2.37, + "learning_rate": 5.254647674378252e-06, + "loss": 0.9673, + "step": 4285 + }, + { + "epoch": 2.37, + "learning_rate": 5.210324096795202e-06, + "loss": 0.9044, + "step": 4290 + }, + { + "epoch": 2.37, + "learning_rate": 5.166166492719124e-06, + "loss": 0.937, + "step": 4295 + }, + { + "epoch": 2.38, + "learning_rate": 5.122175232494255e-06, + "loss": 0.9095, + "step": 4300 + }, + { + "epoch": 2.38, + "learning_rate": 5.078350685069727e-06, + "loss": 0.8787, + "step": 4305 + }, + { + "epoch": 2.38, + "learning_rate": 5.03469321799647e-06, + "loss": 0.8945, + "step": 4310 + }, + { + "epoch": 2.39, + "learning_rate": 4.9912031974241376e-06, + "loss": 0.9285, + "step": 4315 + }, + { + "epoch": 2.39, + "learning_rate": 4.947880988098025e-06, + "loss": 0.9318, + "step": 4320 + }, + { + "epoch": 2.39, + "learning_rate": 4.90472695335602e-06, + "loss": 0.9171, + "step": 4325 + }, + { + "epoch": 2.39, + "learning_rate": 4.8617414551255545e-06, + "loss": 0.923, + "step": 4330 + }, + { + "epoch": 2.4, + "learning_rate": 4.818924853920545e-06, + "loss": 0.9153, + "step": 4335 + }, + { + "epoch": 2.4, + "learning_rate": 4.776277508838428e-06, + "loss": 0.907, + "step": 4340 + }, + { + "epoch": 2.4, + "learning_rate": 4.733799777557069e-06, + "loss": 0.8869, + "step": 4345 + }, + { + "epoch": 2.41, + "learning_rate": 4.691492016331842e-06, + "loss": 0.95, + "step": 4350 + }, + { + "epoch": 2.41, + "learning_rate": 4.6493545799925655e-06, + "loss": 0.9145, + "step": 4355 + }, + { + "epoch": 2.41, + "learning_rate": 4.607387821940609e-06, + "loss": 0.9175, + "step": 4360 + }, + { + "epoch": 2.41, + "learning_rate": 4.565592094145835e-06, + "loss": 0.9125, + "step": 4365 + }, + { + "epoch": 2.42, + "learning_rate": 4.523967747143745e-06, + "loss": 0.8972, + "step": 4370 + }, + { + "epoch": 2.42, + "learning_rate": 4.482515130032453e-06, + "loss": 0.9113, + "step": 4375 + }, + { + "epoch": 2.42, + "learning_rate": 4.441234590469817e-06, + "loss": 0.9447, + "step": 4380 + }, + { + "epoch": 2.42, + "learning_rate": 4.400126474670499e-06, + "loss": 0.9472, + "step": 4385 + }, + { + "epoch": 2.43, + "learning_rate": 4.359191127403059e-06, + "loss": 0.9391, + "step": 4390 + }, + { + "epoch": 2.43, + "learning_rate": 4.318428891987078e-06, + "loss": 0.9171, + "step": 4395 + }, + { + "epoch": 2.43, + "learning_rate": 4.2778401102902595e-06, + "loss": 0.9468, + "step": 4400 + }, + { + "epoch": 2.44, + "learning_rate": 4.237425122725586e-06, + "loss": 0.9034, + "step": 4405 + }, + { + "epoch": 2.44, + "learning_rate": 4.197184268248436e-06, + "loss": 0.8667, + "step": 4410 + }, + { + "epoch": 2.44, + "learning_rate": 4.157117884353765e-06, + "loss": 0.9553, + "step": 4415 + }, + { + "epoch": 2.44, + "learning_rate": 4.117226307073268e-06, + "loss": 0.931, + "step": 4420 + }, + { + "epoch": 2.45, + "learning_rate": 4.07750987097254e-06, + "loss": 0.9287, + "step": 4425 + }, + { + "epoch": 2.45, + "learning_rate": 4.037968909148326e-06, + "loss": 0.8737, + "step": 4430 + }, + { + "epoch": 2.45, + "learning_rate": 3.998603753225647e-06, + "loss": 0.9677, + "step": 4435 + }, + { + "epoch": 2.46, + "learning_rate": 3.959414733355094e-06, + "loss": 0.9194, + "step": 4440 + }, + { + "epoch": 2.46, + "learning_rate": 3.9204021782100115e-06, + "loss": 0.9424, + "step": 4445 + }, + { + "epoch": 2.46, + "learning_rate": 3.8815664149837675e-06, + "loss": 0.9402, + "step": 4450 + }, + { + "epoch": 2.46, + "learning_rate": 3.8429077693869854e-06, + "loss": 0.9196, + "step": 4455 + }, + { + "epoch": 2.47, + "learning_rate": 3.804426565644839e-06, + "loss": 0.9357, + "step": 4460 + }, + { + "epoch": 2.47, + "learning_rate": 3.7661231264943086e-06, + "loss": 0.9234, + "step": 4465 + }, + { + "epoch": 2.47, + "learning_rate": 3.7279977731814963e-06, + "loss": 0.9135, + "step": 4470 + }, + { + "epoch": 2.47, + "learning_rate": 3.690050825458913e-06, + "loss": 0.9074, + "step": 4475 + }, + { + "epoch": 2.48, + "learning_rate": 3.652282601582793e-06, + "loss": 0.8658, + "step": 4480 + }, + { + "epoch": 2.48, + "learning_rate": 3.6146934183104748e-06, + "loss": 0.9522, + "step": 4485 + }, + { + "epoch": 2.48, + "learning_rate": 3.5772835908976538e-06, + "loss": 0.937, + "step": 4490 + }, + { + "epoch": 2.49, + "learning_rate": 3.540053433095841e-06, + "loss": 0.9336, + "step": 4495 + }, + { + "epoch": 2.49, + "learning_rate": 3.503003257149637e-06, + "loss": 0.9437, + "step": 4500 + }, + { + "epoch": 2.49, + "learning_rate": 3.4661333737941976e-06, + "loss": 0.9084, + "step": 4505 + }, + { + "epoch": 2.49, + "learning_rate": 3.429444092252554e-06, + "loss": 0.9378, + "step": 4510 + }, + { + "epoch": 2.5, + "learning_rate": 3.39293572023307e-06, + "loss": 0.8999, + "step": 4515 + }, + { + "epoch": 2.5, + "learning_rate": 3.3566085639268413e-06, + "loss": 0.9417, + "step": 4520 + }, + { + "epoch": 2.5, + "learning_rate": 3.32046292800513e-06, + "loss": 0.9288, + "step": 4525 + }, + { + "epoch": 2.5, + "learning_rate": 3.2844991156168097e-06, + "loss": 0.8957, + "step": 4530 + }, + { + "epoch": 2.51, + "learning_rate": 3.2487174283858223e-06, + "loss": 0.9002, + "step": 4535 + }, + { + "epoch": 2.51, + "learning_rate": 3.2131181664086517e-06, + "loss": 0.9315, + "step": 4540 + }, + { + "epoch": 2.51, + "learning_rate": 3.1777016282517975e-06, + "loss": 0.8948, + "step": 4545 + }, + { + "epoch": 2.52, + "learning_rate": 3.142468110949287e-06, + "loss": 0.9015, + "step": 4550 + }, + { + "epoch": 2.52, + "learning_rate": 3.1074179100001737e-06, + "loss": 0.9273, + "step": 4555 + }, + { + "epoch": 2.52, + "learning_rate": 3.0725513193660404e-06, + "loss": 0.9307, + "step": 4560 + }, + { + "epoch": 2.52, + "learning_rate": 3.0378686314685934e-06, + "loss": 0.9075, + "step": 4565 + }, + { + "epoch": 2.53, + "learning_rate": 3.003370137187128e-06, + "loss": 0.8821, + "step": 4570 + }, + { + "epoch": 2.53, + "learning_rate": 2.969056125856154e-06, + "loss": 0.9245, + "step": 4575 + }, + { + "epoch": 2.53, + "learning_rate": 2.93492688526294e-06, + "loss": 0.9346, + "step": 4580 + }, + { + "epoch": 2.54, + "learning_rate": 2.900982701645111e-06, + "loss": 0.9226, + "step": 4585 + }, + { + "epoch": 2.54, + "learning_rate": 2.867223859688237e-06, + "loss": 0.9215, + "step": 4590 + }, + { + "epoch": 2.54, + "learning_rate": 2.83365064252345e-06, + "loss": 0.878, + "step": 4595 + }, + { + "epoch": 2.54, + "learning_rate": 2.800263331725078e-06, + "loss": 0.9207, + "step": 4600 + }, + { + "epoch": 2.55, + "learning_rate": 2.7670622073082657e-06, + "loss": 0.9156, + "step": 4605 + }, + { + "epoch": 2.55, + "learning_rate": 2.7340475477266507e-06, + "loss": 0.9296, + "step": 4610 + }, + { + "epoch": 2.55, + "learning_rate": 2.701219629869986e-06, + "loss": 0.9346, + "step": 4615 + }, + { + "epoch": 2.55, + "learning_rate": 2.6685787290618825e-06, + "loss": 0.9237, + "step": 4620 + }, + { + "epoch": 2.56, + "learning_rate": 2.636125119057428e-06, + "loss": 0.9162, + "step": 4625 + }, + { + "epoch": 2.56, + "learning_rate": 2.6038590720409565e-06, + "loss": 0.9145, + "step": 4630 + }, + { + "epoch": 2.56, + "learning_rate": 2.5717808586237067e-06, + "loss": 0.9776, + "step": 4635 + }, + { + "epoch": 2.57, + "learning_rate": 2.539890747841611e-06, + "loss": 0.9519, + "step": 4640 + }, + { + "epoch": 2.57, + "learning_rate": 2.5081890071529695e-06, + "loss": 0.9116, + "step": 4645 + }, + { + "epoch": 2.57, + "learning_rate": 2.4766759024362927e-06, + "loss": 0.9217, + "step": 4650 + }, + { + "epoch": 2.57, + "learning_rate": 2.445351697987988e-06, + "loss": 0.9113, + "step": 4655 + }, + { + "epoch": 2.58, + "learning_rate": 2.414216656520191e-06, + "loss": 0.9089, + "step": 4660 + }, + { + "epoch": 2.58, + "learning_rate": 2.3832710391585605e-06, + "loss": 0.9367, + "step": 4665 + }, + { + "epoch": 2.58, + "learning_rate": 2.3525151054400675e-06, + "loss": 0.9453, + "step": 4670 + }, + { + "epoch": 2.59, + "learning_rate": 2.3219491133108394e-06, + "loss": 0.9187, + "step": 4675 + }, + { + "epoch": 2.59, + "learning_rate": 2.2915733191239824e-06, + "loss": 0.911, + "step": 4680 + }, + { + "epoch": 2.59, + "learning_rate": 2.261387977637436e-06, + "loss": 0.9309, + "step": 4685 + }, + { + "epoch": 2.59, + "learning_rate": 2.2313933420118395e-06, + "loss": 0.8921, + "step": 4690 + }, + { + "epoch": 2.6, + "learning_rate": 2.2015896638084037e-06, + "loss": 0.9332, + "step": 4695 + }, + { + "epoch": 2.6, + "learning_rate": 2.171977192986813e-06, + "loss": 0.9316, + "step": 4700 + }, + { + "epoch": 2.6, + "learning_rate": 2.142556177903096e-06, + "loss": 0.9453, + "step": 4705 + }, + { + "epoch": 2.6, + "learning_rate": 2.1133268653076022e-06, + "loss": 0.937, + "step": 4710 + }, + { + "epoch": 2.61, + "learning_rate": 2.084289500342862e-06, + "loss": 0.9067, + "step": 4715 + }, + { + "epoch": 2.61, + "learning_rate": 2.0554443265415864e-06, + "loss": 0.9694, + "step": 4720 + }, + { + "epoch": 2.61, + "learning_rate": 2.0267915858245943e-06, + "loss": 0.9217, + "step": 4725 + }, + { + "epoch": 2.62, + "learning_rate": 1.998331518498797e-06, + "loss": 0.9399, + "step": 4730 + }, + { + "epoch": 2.62, + "learning_rate": 1.970064363255175e-06, + "loss": 0.9164, + "step": 4735 + }, + { + "epoch": 2.62, + "learning_rate": 1.941990357166784e-06, + "loss": 0.9336, + "step": 4740 + }, + { + "epoch": 2.62, + "learning_rate": 1.9141097356867644e-06, + "loss": 0.926, + "step": 4745 + }, + { + "epoch": 2.63, + "learning_rate": 1.8864227326463452e-06, + "loss": 0.9109, + "step": 4750 + }, + { + "epoch": 2.63, + "learning_rate": 1.8589295802529328e-06, + "loss": 0.9528, + "step": 4755 + }, + { + "epoch": 2.63, + "learning_rate": 1.8316305090881003e-06, + "loss": 0.9142, + "step": 4760 + }, + { + "epoch": 2.63, + "learning_rate": 1.8045257481057204e-06, + "loss": 0.9247, + "step": 4765 + }, + { + "epoch": 2.64, + "learning_rate": 1.7776155246299747e-06, + "loss": 0.9162, + "step": 4770 + }, + { + "epoch": 2.64, + "learning_rate": 1.7509000643535167e-06, + "loss": 0.9084, + "step": 4775 + }, + { + "epoch": 2.64, + "learning_rate": 1.7243795913355148e-06, + "loss": 0.8427, + "step": 4780 + }, + { + "epoch": 2.65, + "learning_rate": 1.6980543279998401e-06, + "loss": 0.8851, + "step": 4785 + }, + { + "epoch": 2.65, + "learning_rate": 1.671924495133126e-06, + "loss": 0.9155, + "step": 4790 + }, + { + "epoch": 2.65, + "learning_rate": 1.6459903118829777e-06, + "loss": 0.9049, + "step": 4795 + }, + { + "epoch": 2.65, + "learning_rate": 1.6202519957561114e-06, + "loss": 0.8831, + "step": 4800 + }, + { + "epoch": 2.66, + "learning_rate": 1.5947097626165252e-06, + "loss": 0.9286, + "step": 4805 + }, + { + "epoch": 2.66, + "learning_rate": 1.5693638266836952e-06, + "loss": 0.9299, + "step": 4810 + }, + { + "epoch": 2.66, + "learning_rate": 1.5442144005307774e-06, + "loss": 0.8739, + "step": 4815 + }, + { + "epoch": 2.67, + "learning_rate": 1.519261695082827e-06, + "loss": 0.9201, + "step": 4820 + }, + { + "epoch": 2.67, + "learning_rate": 1.4945059196150247e-06, + "loss": 0.9062, + "step": 4825 + }, + { + "epoch": 2.67, + "learning_rate": 1.4699472817509248e-06, + "loss": 0.9304, + "step": 4830 + }, + { + "epoch": 2.67, + "learning_rate": 1.4455859874607235e-06, + "loss": 0.9253, + "step": 4835 + }, + { + "epoch": 2.68, + "learning_rate": 1.4214222410594947e-06, + "loss": 0.9261, + "step": 4840 + }, + { + "epoch": 2.68, + "learning_rate": 1.3974562452055418e-06, + "loss": 0.8764, + "step": 4845 + }, + { + "epoch": 2.68, + "learning_rate": 1.3736882008986262e-06, + "loss": 0.8906, + "step": 4850 + }, + { + "epoch": 2.68, + "learning_rate": 1.3501183074783263e-06, + "loss": 0.9671, + "step": 4855 + }, + { + "epoch": 2.69, + "learning_rate": 1.3267467626223606e-06, + "loss": 0.8979, + "step": 4860 + }, + { + "epoch": 2.69, + "learning_rate": 1.3035737623449146e-06, + "loss": 0.9284, + "step": 4865 + }, + { + "epoch": 2.69, + "learning_rate": 1.2805995009950083e-06, + "loss": 0.9338, + "step": 4870 + }, + { + "epoch": 2.7, + "learning_rate": 1.257824171254865e-06, + "loss": 0.9474, + "step": 4875 + }, + { + "epoch": 2.7, + "learning_rate": 1.2352479641382919e-06, + "loss": 0.9307, + "step": 4880 + }, + { + "epoch": 2.7, + "learning_rate": 1.2128710689890826e-06, + "loss": 0.9578, + "step": 4885 + }, + { + "epoch": 2.7, + "learning_rate": 1.1906936734794233e-06, + "loss": 0.9123, + "step": 4890 + }, + { + "epoch": 2.71, + "learning_rate": 1.1687159636083161e-06, + "loss": 0.9157, + "step": 4895 + }, + { + "epoch": 2.71, + "learning_rate": 1.1469381237000476e-06, + "loss": 0.9135, + "step": 4900 + }, + { + "epoch": 2.71, + "learning_rate": 1.1253603364025867e-06, + "loss": 0.9073, + "step": 4905 + }, + { + "epoch": 2.71, + "learning_rate": 1.1039827826861193e-06, + "loss": 0.9198, + "step": 4910 + }, + { + "epoch": 2.72, + "learning_rate": 1.0828056418414695e-06, + "loss": 0.911, + "step": 4915 + }, + { + "epoch": 2.72, + "learning_rate": 1.06182909147865e-06, + "loss": 0.9124, + "step": 4920 + }, + { + "epoch": 2.72, + "learning_rate": 1.0410533075253248e-06, + "loss": 0.9308, + "step": 4925 + }, + { + "epoch": 2.73, + "learning_rate": 1.020478464225369e-06, + "loss": 0.9005, + "step": 4930 + }, + { + "epoch": 2.73, + "learning_rate": 1.0001047341373832e-06, + "loss": 0.9326, + "step": 4935 + }, + { + "epoch": 2.73, + "learning_rate": 9.7993228813327e-07, + "loss": 0.8905, + "step": 4940 + }, + { + "epoch": 2.73, + "learning_rate": 9.599612953967746e-07, + "loss": 0.9238, + "step": 4945 + }, + { + "epoch": 2.74, + "learning_rate": 9.401919234220902e-07, + "loss": 0.8964, + "step": 4950 + }, + { + "epoch": 2.74, + "learning_rate": 9.206243380124352e-07, + "loss": 0.9183, + "step": 4955 + }, + { + "epoch": 2.74, + "learning_rate": 9.012587032786706e-07, + "loss": 0.9074, + "step": 4960 + }, + { + "epoch": 2.75, + "learning_rate": 8.820951816379263e-07, + "loss": 0.9255, + "step": 4965 + }, + { + "epoch": 2.75, + "learning_rate": 8.631339338122324e-07, + "loss": 0.9133, + "step": 4970 + }, + { + "epoch": 2.75, + "learning_rate": 8.443751188271703e-07, + "loss": 0.9056, + "step": 4975 + }, + { + "epoch": 2.75, + "learning_rate": 8.258188940105549e-07, + "loss": 0.8905, + "step": 4980 + }, + { + "epoch": 2.76, + "learning_rate": 8.074654149910821e-07, + "loss": 0.9088, + "step": 4985 + }, + { + "epoch": 2.76, + "learning_rate": 7.893148356970748e-07, + "loss": 0.8994, + "step": 4990 + }, + { + "epoch": 2.76, + "learning_rate": 7.713673083551281e-07, + "loss": 0.8946, + "step": 4995 + }, + { + "epoch": 2.76, + "learning_rate": 7.536229834888913e-07, + "loss": 0.924, + "step": 5000 + }, + { + "epoch": 2.77, + "learning_rate": 7.360820099177712e-07, + "loss": 0.9102, + "step": 5005 + }, + { + "epoch": 2.77, + "learning_rate": 7.187445347556859e-07, + "loss": 0.9198, + "step": 5010 + }, + { + "epoch": 2.77, + "learning_rate": 7.016107034098524e-07, + "loss": 0.9004, + "step": 5015 + }, + { + "epoch": 2.78, + "learning_rate": 6.846806595795424e-07, + "loss": 0.8849, + "step": 5020 + }, + { + "epoch": 2.78, + "learning_rate": 6.679545452548924e-07, + "loss": 0.9473, + "step": 5025 + }, + { + "epoch": 2.78, + "learning_rate": 6.514325007157013e-07, + "loss": 0.9339, + "step": 5030 + }, + { + "epoch": 2.78, + "learning_rate": 6.35114664530273e-07, + "loss": 0.9172, + "step": 5035 + }, + { + "epoch": 2.79, + "learning_rate": 6.190011735542262e-07, + "loss": 0.9796, + "step": 5040 + }, + { + "epoch": 2.79, + "learning_rate": 6.030921629293778e-07, + "loss": 0.9367, + "step": 5045 + }, + { + "epoch": 2.79, + "learning_rate": 5.873877660825783e-07, + "loss": 0.9254, + "step": 5050 + }, + { + "epoch": 2.8, + "learning_rate": 5.718881147246252e-07, + "loss": 0.8976, + "step": 5055 + }, + { + "epoch": 2.8, + "learning_rate": 5.565933388491263e-07, + "loss": 0.9099, + "step": 5060 + }, + { + "epoch": 2.8, + "learning_rate": 5.415035667314328e-07, + "loss": 0.9057, + "step": 5065 + }, + { + "epoch": 2.8, + "learning_rate": 5.266189249275521e-07, + "loss": 0.8978, + "step": 5070 + }, + { + "epoch": 2.81, + "learning_rate": 5.119395382730929e-07, + "loss": 0.9172, + "step": 5075 + }, + { + "epoch": 2.81, + "learning_rate": 4.974655298822129e-07, + "loss": 0.9393, + "step": 5080 + }, + { + "epoch": 2.81, + "learning_rate": 4.831970211465892e-07, + "loss": 0.9137, + "step": 5085 + }, + { + "epoch": 2.81, + "learning_rate": 4.6913413173439723e-07, + "loss": 0.8573, + "step": 5090 + }, + { + "epoch": 2.82, + "learning_rate": 4.552769795893086e-07, + "loss": 0.9533, + "step": 5095 + }, + { + "epoch": 2.82, + "learning_rate": 4.416256809295083e-07, + "loss": 0.8693, + "step": 5100 + }, + { + "epoch": 2.82, + "learning_rate": 4.2818035024670963e-07, + "loss": 0.9531, + "step": 5105 + }, + { + "epoch": 2.83, + "learning_rate": 4.1494110030519397e-07, + "loss": 0.9077, + "step": 5110 + }, + { + "epoch": 2.83, + "learning_rate": 4.019080421408833e-07, + "loss": 0.9061, + "step": 5115 + }, + { + "epoch": 2.83, + "learning_rate": 3.8908128506037756e-07, + "loss": 0.9606, + "step": 5120 + }, + { + "epoch": 2.83, + "learning_rate": 3.7646093664007456e-07, + "loss": 0.9335, + "step": 5125 + }, + { + "epoch": 2.84, + "learning_rate": 3.640471027252346e-07, + "loss": 0.9054, + "step": 5130 + }, + { + "epoch": 2.84, + "learning_rate": 3.5183988742910903e-07, + "loss": 0.8801, + "step": 5135 + }, + { + "epoch": 2.84, + "learning_rate": 3.398393931320687e-07, + "loss": 0.913, + "step": 5140 + }, + { + "epoch": 2.84, + "learning_rate": 3.2804572048074357e-07, + "loss": 0.904, + "step": 5145 + }, + { + "epoch": 2.85, + "learning_rate": 3.164589683871705e-07, + "loss": 0.9144, + "step": 5150 + }, + { + "epoch": 2.85, + "learning_rate": 3.050792340279718e-07, + "loss": 0.9145, + "step": 5155 + }, + { + "epoch": 2.85, + "learning_rate": 2.939066128435419e-07, + "loss": 0.957, + "step": 5160 + }, + { + "epoch": 2.86, + "learning_rate": 2.829411985372399e-07, + "loss": 0.9196, + "step": 5165 + }, + { + "epoch": 2.86, + "learning_rate": 2.7218308307460916e-07, + "loss": 0.8893, + "step": 5170 + }, + { + "epoch": 2.86, + "learning_rate": 2.616323566825979e-07, + "loss": 0.9049, + "step": 5175 + }, + { + "epoch": 2.86, + "learning_rate": 2.51289107848815e-07, + "loss": 0.8977, + "step": 5180 + }, + { + "epoch": 2.87, + "learning_rate": 2.4115342332078074e-07, + "loss": 0.9187, + "step": 5185 + }, + { + "epoch": 2.87, + "learning_rate": 2.312253881051968e-07, + "loss": 0.9355, + "step": 5190 + }, + { + "epoch": 2.87, + "learning_rate": 2.2150508546723848e-07, + "loss": 0.9337, + "step": 5195 + }, + { + "epoch": 2.88, + "learning_rate": 2.119925969298553e-07, + "loss": 0.9011, + "step": 5200 + }, + { + "epoch": 2.88, + "learning_rate": 2.0268800227307982e-07, + "loss": 0.8987, + "step": 5205 + }, + { + "epoch": 2.88, + "learning_rate": 1.9359137953337548e-07, + "loss": 0.9206, + "step": 5210 + }, + { + "epoch": 2.88, + "learning_rate": 1.8470280500296199e-07, + "loss": 0.9485, + "step": 5215 + }, + { + "epoch": 2.89, + "learning_rate": 1.7602235322919102e-07, + "loss": 0.8902, + "step": 5220 + }, + { + "epoch": 2.89, + "learning_rate": 1.6755009701391045e-07, + "loss": 0.9484, + "step": 5225 + }, + { + "epoch": 2.89, + "learning_rate": 1.592861074128621e-07, + "loss": 0.9361, + "step": 5230 + }, + { + "epoch": 2.89, + "learning_rate": 1.5123045373508226e-07, + "loss": 0.9407, + "step": 5235 + }, + { + "epoch": 2.9, + "learning_rate": 1.4338320354231605e-07, + "loss": 0.8792, + "step": 5240 + }, + { + "epoch": 2.9, + "learning_rate": 1.3574442264846222e-07, + "loss": 0.9682, + "step": 5245 + }, + { + "epoch": 2.9, + "learning_rate": 1.2831417511900423e-07, + "loss": 0.8995, + "step": 5250 + }, + { + "epoch": 2.91, + "learning_rate": 1.2109252327048849e-07, + "loss": 0.8877, + "step": 5255 + }, + { + "epoch": 2.91, + "learning_rate": 1.1407952766999686e-07, + "loss": 0.8746, + "step": 5260 + }, + { + "epoch": 2.91, + "learning_rate": 1.0727524713463333e-07, + "loss": 0.9287, + "step": 5265 + }, + { + "epoch": 2.91, + "learning_rate": 1.0067973873104097e-07, + "loss": 0.9045, + "step": 5270 + }, + { + "epoch": 2.92, + "learning_rate": 9.42930577749107e-08, + "loss": 0.942, + "step": 5275 + }, + { + "epoch": 2.92, + "learning_rate": 8.811525783052888e-08, + "loss": 0.885, + "step": 5280 + }, + { + "epoch": 2.92, + "learning_rate": 8.214639071031926e-08, + "loss": 0.8947, + "step": 5285 + }, + { + "epoch": 2.93, + "learning_rate": 7.638650647442125e-08, + "loss": 0.9309, + "step": 5290 + }, + { + "epoch": 2.93, + "learning_rate": 7.083565343024845e-08, + "loss": 0.896, + "step": 5295 + }, + { + "epoch": 2.93, + "learning_rate": 6.549387813210572e-08, + "loss": 0.9132, + "step": 5300 + }, + { + "epoch": 2.93, + "learning_rate": 6.036122538078393e-08, + "loss": 0.9185, + "step": 5305 + }, + { + "epoch": 2.94, + "learning_rate": 5.543773822319631e-08, + "loss": 0.9285, + "step": 5310 + }, + { + "epoch": 2.94, + "learning_rate": 5.072345795200384e-08, + "loss": 0.9074, + "step": 5315 + }, + { + "epoch": 2.94, + "learning_rate": 4.621842410527655e-08, + "loss": 0.8915, + "step": 5320 + }, + { + "epoch": 2.94, + "learning_rate": 4.1922674466166045e-08, + "loss": 0.8647, + "step": 5325 + }, + { + "epoch": 2.95, + "learning_rate": 3.783624506257799e-08, + "loss": 0.9393, + "step": 5330 + }, + { + "epoch": 2.95, + "learning_rate": 3.395917016688344e-08, + "loss": 0.9303, + "step": 5335 + }, + { + "epoch": 2.95, + "learning_rate": 3.029148229561629e-08, + "loss": 0.8998, + "step": 5340 + }, + { + "epoch": 2.96, + "learning_rate": 2.6833212209206872e-08, + "loss": 0.8964, + "step": 5345 + }, + { + "epoch": 2.96, + "learning_rate": 2.358438891173487e-08, + "loss": 0.9528, + "step": 5350 + }, + { + "epoch": 2.96, + "learning_rate": 2.0545039650665675e-08, + "loss": 0.8977, + "step": 5355 + }, + { + "epoch": 2.96, + "learning_rate": 1.7715189916636676e-08, + "loss": 0.9175, + "step": 5360 + }, + { + "epoch": 2.97, + "learning_rate": 1.5094863443243513e-08, + "loss": 0.8991, + "step": 5365 + }, + { + "epoch": 2.97, + "learning_rate": 1.2684082206829151e-08, + "loss": 0.9203, + "step": 5370 + }, + { + "epoch": 2.97, + "learning_rate": 1.0482866426311799e-08, + "loss": 0.9347, + "step": 5375 + }, + { + "epoch": 2.97, + "learning_rate": 8.491234563010041e-09, + "loss": 0.9361, + "step": 5380 + }, + { + "epoch": 2.98, + "learning_rate": 6.709203320484636e-09, + "loss": 0.8695, + "step": 5385 + }, + { + "epoch": 2.98, + "learning_rate": 5.13678764441361e-09, + "loss": 0.981, + "step": 5390 + }, + { + "epoch": 2.98, + "learning_rate": 3.774000722439608e-09, + "loss": 0.8908, + "step": 5395 + }, + { + "epoch": 2.99, + "learning_rate": 2.6208539840894e-09, + "loss": 0.9283, + "step": 5400 + }, + { + "epoch": 2.99, + "learning_rate": 1.6773571006573062e-09, + "loss": 0.9211, + "step": 5405 + }, + { + "epoch": 2.99, + "learning_rate": 9.435179851330355e-10, + "loss": 0.8897, + "step": 5410 + }, + { + "epoch": 2.99, + "learning_rate": 4.1934279213229346e-10, + "loss": 0.9343, + "step": 5415 + }, + { + "epoch": 3.0, + "learning_rate": 1.0483591784404834e-10, + "loss": 0.931, + "step": 5420 + }, + { + "epoch": 3.0, + "step": 5424, + "total_flos": 1.7625983501520077e+18, + "train_loss": 0.9648243357649947, + "train_runtime": 56978.2271, + "train_samples_per_second": 1.523, + "train_steps_per_second": 0.095 + } + ], + "max_steps": 5424, + "num_train_epochs": 3, + "total_flos": 1.7625983501520077e+18, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..77433459daca6603950afc463ff989e817f81ac3 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f6a8853599a52b1f15984d75ce4b6434c335fdc67d4509d0b2ca0d8c613590a +size 3362 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..b0c3594d36fb66a7b04e81abea0c49e55cbd0a73 Binary files /dev/null and b/training_loss.png differ